讀取變項,整理檔案
ML_raw <- read.table("D:/104/ML_R/WOW_data.csv",
header=TRUE, sep=",")
ML<-ML_raw[c(5,6,1,22,4,12)]
attach(ML)
head(ML)
## senson school_3 X reading_1 gender reading_3
## 1 1 206 3 472 0 492
## 2 1 101 15 478 0 494
## 3 1 103 16 500 0 523
## 4 1 104 30 431 1 476
## 5 1 107 64 500 0 515
## 6 1 101 66 429 0 450
以學校為目標計算平均數,並將資料以學校併入
ML_M<-aggregate(cbind(reading_1,reading_3,gender) ~school_3,data=ML,mean)
names(ML_M)<-c("school_3","reading_1_mean","reading_3_mean","man_ratio")
ML_sd<-aggregate(cbind(reading_1,reading_3) ~school_3,data=ML,sd)
names(ML_sd)[2:3]<-c("reading_1_SD","reading_3_SD")
head(ML_M)
## school_3 reading_1_mean reading_3_mean man_ratio
## 1 101 474.4444 498.6667 0.4444444
## 2 102 482.3333 505.6667 0.6666667
## 3 103 464.2500 487.7500 0.5000000
## 4 104 469.1250 488.7500 0.6250000
## 5 105 469.0000 492.5000 0.4166667
## 6 106 482.0000 498.8571 0.4285714
載入套件畫圖
library(ggplot2)
library(lattice)
library(plotly)
library(plyr)
將每位學生重新排序,看一下資料
ML_r<-ML[c(4,6)]
ML_r<-stack(ML_r)
head(ML_r)
## values ind
## 1 472 reading_1
## 2 478 reading_1
## 3 500 reading_1
## 4 431 reading_1
## 5 500 reading_1
## 6 429 reading_1
將各校平均分數重新排列
ML_mm<-ML_M[c(2,3)]
ML_mm<-stack(ML_mm)
head(ML_mm)
## values ind
## 1 474.4444 reading_1_mean
## 2 482.3333 reading_1_mean
## 3 464.2500 reading_1_mean
## 4 469.1250 reading_1_mean
## 5 469.0000 reading_1_mean
## 6 482.0000 reading_1_mean
直方圖(所有學生)
p1<- ggplot(ML_r, aes(x=values, fill=ind)) +
geom_histogram(binwidth=.5, position="dodge")+
xlab("raw_score") + ylab("count")
ggplotly(p1)
機率分佈(所有學生)
p2<-ggplot(ML_r, aes(x=values, fill=ind)) + geom_density(alpha=.3)+
xlab("raw_score") #y=PDF
ggplotly(p2)
直方圖(所有學校)
p3<-ggplot(ML_mm, aes(x=values, fill=ind)) +
geom_histogram(binwidth=.5, position="dodge")+
xlab("average score of school") + ylab("count")
ggplotly(p3)
機率分佈(所有學校)
p4<-ggplot(ML_mm, aes(x=values, fill=ind)) + geom_density(alpha=.3)+
xlab("average score of school")
ggplotly(p4)
學生層次相關係數
cor(ML[c(4,6)])
## reading_1 reading_3
## reading_1 1.0000000 0.8709099
## reading_3 0.8709099 1.0000000
學校層次相關係數(生態相關)
cor(ML_M[c(-1,-4)])
## reading_1_mean reading_3_mean
## reading_1_mean 1.0000000 0.8838805
## reading_3_mean 0.8838805 1.0000000
整理排序(把學生跟學校都放進來,另外加一個type)
ML$senson<-as.factor(ML$senson)
ML_m1<-ML_M[c(2,3)]
names(ML_m1)<- c("reading_1","reading_3")
ML_m2<-rbind(ML_m1,ML[c(4,6)])
ML_m2$type<-rep(c("school","student"), c(33,193))#+type
head(ML_m2)
## reading_1 reading_3 type
## 1 474.4444 498.6667 school
## 2 482.3333 505.6667 school
## 3 464.2500 487.7500 school
## 4 469.1250 488.7500 school
## 5 469.0000 492.5000 school
## 6 482.0000 498.8571 school
繪製全部學生及學校的散佈圖+迴歸線
p5<-ggplot(ML_m2, aes(x=reading_1, y=reading_3, color=type, shape=type)) +
geom_point(size=2) +
geom_smooth(method=lm,aes(fill=type),size=2)
ggplotly(p5)
三年級數學成績預測五年級數學成績(學校)
p6<-ggplot(data=ML,aes(x=reading_1,y=reading_3,group=school_3))+
stat_smooth(method = 'lm',se=F,color='#999999')+
geom_point(size=2.5,color='#56B4E9')+
stat_smooth(aes(group=1),method = 'lm',se=F,color='black')+
labs(x='reading_1',y='reading_3',title='school')
ggplotly(p6)
三年級數學成績預測五年級數學成績(senson)
p7<-ggplot(data=ML,aes(x=reading_1,y=reading_3,group=senson,shape=senson))+
geom_smooth(aes(color=senson),method = "lm", se = F)+
geom_point(aes(color=senson),size=2.5)+
stat_smooth(aes(group=1),method = 'lm',se=F,color='black')+
labs(x='reading_1',y='reading_3',title='senson')+
scale_colour_manual(values=c("#56B4E9", "#009E73"))
ggplotly(p7)
將自變項以總分置中
ML$reading_1_center<-scale(ML$reading_1,scale=F)
把senson和學校轉成factor
將性別轉換成文字(0=fegender,1=gender)
ML$school_3<-as.factor(ML$school_3)
ML$gender <- mapvalues(ML$gender,
from = c(0,1),
to = c("female", "male"))
head(ML)
## senson school_3 X reading_1 gender reading_3 reading_1_center
## 1 1 206 3 472 female 492 -5.2642487
## 2 1 101 15 478 female 494 0.7357513
## 3 1 103 16 500 female 523 22.7357513
## 4 1 104 30 431 male 476 -46.2642487
## 5 1 107 64 500 female 515 22.7357513
## 6 1 101 66 429 female 450 -48.2642487
隨機選取學校及區域
ns<-sample(levels(ML$school_3),9)
nc<-sample(levels(ML$senson),2)
學校
p8<-ggplot(data=ML[ML$school_3 %in% ns,],aes(x=reading_1_center,y=reading_3,color=gender))+
geom_point(size=1)+
stat_smooth(method = 'lm',se=F)+
facet_wrap(~school_3)
ggplotly(p8)
senson
p9<-ggplot(data=ML[ML$senson %in% nc,],aes(x=reading_1_center,y=reading_3,color=gender))+
geom_point(size=1)+
stat_smooth(method = 'lm',se=F)+
facet_wrap(~senson)
ggplotly(p9)