Exercise 1
dta <- read.table("hs0.txt", header = T)
head(dta)
## id female race ses schtyp prog read write math science socst
## 1 70 male white low public general 57 52 41 47 57
## 2 121 female white middle public vocation 68 59 53 63 61
## 3 86 male white high public general 44 33 54 58 31
## 4 141 male white high public vocation 63 44 47 53 56
## 5 172 male white middle public academic 47 52 57 53 61
## 6 113 male white middle public academic 44 52 51 63 61
library(reshape2)
library(tidyverse)
dta_l <- dta %>% melt(., measure.vars=colnames(.)[7:11]) #將資料轉成long form
head(dta_l)
## id female race ses schtyp prog variable value
## 1 70 male white low public general read 57
## 2 121 female white middle public vocation read 68
## 3 86 male white high public general read 44
## 4 141 male white high public vocation read 63
## 5 172 male white middle public academic read 47
## 6 113 male white middle public academic read 44
#使用{Rmisc} summarySE功能
dta_lsum<-Rmisc::summarySE(dta_l, measurevar="value", groupvars="variable")
dta_lsum
## variable N value sd se ci
## 1 read 200 52.230 10.252937 0.7249921 1.429653
## 2 write 200 52.775 9.478586 0.6702372 1.321679
## 3 math 200 52.645 9.368448 0.6624493 1.306321
## 4 science 200 NA NA NA NA
## 5 socst 200 52.405 10.735793 0.7591352 1.496982
#因為有遺漏值無法計算,刪除遺漏值
dta_l <-na.omit(dta_l)
dta_lsum<-Rmisc::summarySE(dta_l, measurevar="value", groupvars="variable")
dta_lsum
## variable N value sd se ci
## 1 read 200 52.23000 10.252937 0.7249921 1.429653
## 2 write 200 52.77500 9.478586 0.6702372 1.321679
## 3 math 200 52.64500 9.368448 0.6624493 1.306321
## 4 science 195 51.91795 9.796035 0.7015086 1.383563
## 5 socst 200 52.40500 10.735793 0.7591352 1.496982
library(ggplot2)
pd <- position_dodge(0.1) #move them .05 to the left and right
ggplot(dta_lsum, aes(x=variable, y=value, color= variable, group=variable))+ #此時的value是平均值
geom_errorbar(aes(ymax=value+ci, ymin=value-ci), width=.1, position = pd)+
geom_line(position = pd)+
geom_point(position=pd, size=3, shape=21, fill="white")+
xlab("Exam type")+
ylab("Mean Scores")+
ggtitle("Mean diffrences between different exams")
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

anova(lm(value~variable, data = dta_l)) #p value=0.92,五組之間沒有顯著差異
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## variable 4 91 22.805 0.2308 0.9211
## Residuals 990 97817 98.805