載入所需要套件和資料
library(heplots)
library(reshape2)
library(ggplot2)
library(plotly)
library(plyr)
library(moments)
library(nlme)
前36筆是男生,後28筆是女生 給ID 給gender
data(VocabGrowth)
LG <- VocabGrowth
LG$id <- c(1:64)
LG$gender <- factor(c(rep(1,36),rep(2,28)))
head(LG)
## grade8 grade9 grade10 grade11 id gender
## 1 1.75 2.60 3.76 3.68 1 1
## 2 0.90 2.47 2.44 3.43 2 1
## 3 0.80 0.93 0.40 2.27 3 1
## 4 2.42 4.15 4.56 4.21 4 1
## 5 -1.31 -1.31 -0.66 -2.22 5 1
## 6 -1.56 1.67 0.18 2.33 6 1
更動資料排列形狀
LGl <- melt(LG, variable.name ='gradename', value.name = 'score',id=c('id','gender'))
依照gradename重新創一個數字型態的年級變項
LGl$grade <- as.numeric(substr(LGl$gradename, 6, 7))
head(LGl)
## id gender gradename score grade
## 1 1 1 grade8 1.75 8
## 2 2 1 grade8 0.90 8
## 3 3 1 grade8 0.80 8
## 4 4 1 grade8 2.42 8
## 5 5 1 grade8 -1.31 8
## 6 6 1 grade8 -1.56 8
看四個時間點的平均數與變異數(以年級和性別為依據)
aggregate(score ~ grade+gender, data = LGl, mean)
## grade gender score
## 1 8 1 1.141111
## 2 9 1 2.375000
## 3 10 1 2.880278
## 4 11 1 3.536389
## 5 8 2 1.132857
## 6 9 2 2.756071
## 7 10 2 3.127143
## 8 11 2 3.381786
aggregate(score ~ grade+gender, data = LGl, sd)
## grade gender score
## 1 8 1 1.984037
## 2 9 1 2.348258
## 3 10 1 2.397270
## 4 11 1 2.192743
## 5 8 2 1.798256
## 6 9 2 1.705784
## 7 10 2 1.868049
## 8 11 2 1.553820
給性別名字
LGl$gender <- mapvalues(LGl$gender,
from = c(2,1),
to = c("female", "male"))
男女生均值剖面圖
pd<-position_dodge(width = .2)
p1<-ggplot(data = LGl, aes(x = gradename, y = score,group=gender, shape=gender,colour=gender,size=1.5)) +
stat_summary(fun.data = 'mean_cl_boot', size = 3, position = pd) +
stat_summary(fun.y = mean, geom = 'line', aes(group = gender), position = pd,size = 1.2) +
guides(shape = guide_legend(title = '', reverse = TRUE)) +
labs(x = '年級', y = '字彙分數')
ggplotly(p1)
將性別分開分析
LGl_M <- subset(LGl, gender == "male")
LGl_F <- subset(LGl, gender == "female")
峰度偏態、機率密度圖(男生)
aggregate(score ~ grade, data = LGl_M, skewness)
## grade score
## 1 8 1.0361765
## 2 9 0.9958847
## 3 10 0.7435270
## 4 11 0.5666287
aggregate(score ~ grade, data = LGl_M, kurtosis)
## grade score
## 1 8 5.843873
## 2 9 4.092944
## 3 10 3.690133
## 4 11 5.210051
p2<- ggplot(LGl_M, aes(score, fill = gradename)) +
geom_density(alpha = 0.2)+ xlim(-5, 15)+
xlab('字彙分數')
ggplotly(p2)
峰度偏態、機率密度圖(女生)
aggregate(score ~ grade, data = LGl_F, skewness)
## grade score
## 1 8 0.3455779
## 2 9 0.2889140
## 3 10 1.4372096
## 4 11 0.7776863
aggregate(score ~ grade, data = LGl_F, kurtosis)
## grade score
## 1 8 3.394654
## 2 9 2.750384
## 3 10 5.527719
## 4 11 3.703205
p3<- ggplot(LGl_F, aes(score, fill = gradename)) +
geom_density(alpha = 0.2)+ xlim(-5, 15)+
xlab('字彙分數')
ggplotly(p3)
個別資料,畫上迴歸線並配上區間(男生)
p4<-ggplot(data = LGl_M, aes(x = grade, y = score)) +
geom_line(aes(group = id)) +
stat_summary(fun.data = 'mean_cl_boot') +
stat_smooth() +
labs(x = '年級', y = '字彙分數')
ggplotly(p4)
個別資料,畫上迴歸線並配上區間(女生)
p5<-ggplot(data = LGl_F, aes(x = grade, y = score)) +
geom_line(aes(group = id)) +
stat_summary(fun.data = 'mean_cl_boot') +
stat_smooth() +
labs(x = '年級', y = '字彙分數')
ggplotly(p5)
個別迴歸線(男生)
p6<-ggplot(data = LGl_M, aes(x = grade, y = score)) +
stat_smooth(aes(group = id), method = 'lm', se = F, color = "#3399ff",size=0.8,alpha = 0.5) +
geom_point(color ="#ff8080" ,alpha = 0.4,size=2.5) +
labs(x = '年級', y = '字彙分數')
ggplotly(p6)
個別迴歸線(女生)
p6<-ggplot(data = LGl_F, aes(x = grade, y = score)) +
stat_smooth(aes(group = id), method = 'lm', se = F, color = "#3399ff",size=0.8,alpha = 0.5) +
geom_point(color ="#ff8080" ,alpha = 0.4,size=2.5) +
labs(x = '年級', y = '字彙分數')
ggplotly(p6)
性別均值置中
LGl_M$grade_c <- scale(LGl_M$grade, scale = F)
LGl_F$grade_c <- scale(LGl_F$grade, scale = F)
將每個人迴歸線的截距、斜率記下來
m1 <- lmList(score ~ grade_c | id, data = LGl_M)
f1 <- lmList(score ~ grade_c | id, data = LGl_F)
看一下離群值(男生)
p7<-dataEllipse(coef(m1)[, 1], coef(m1)[, 2], pch = 19,levels=c(.68, .95),
col = 'black', id.n=2, xlab = '截距估計值',
ylab = '斜率估計值')
看一下離群值(女生)
p8<-dataEllipse(coef(f1)[, 1], coef(f1)[, 2], pch = 19,levels=c(.68, .95),
col = 'black', id.n=2, xlab = '截距估計值',
ylab = '斜率估計值')
(未完)