Exercise 1

dta <- read.table("hs0.txt", header = T)
head(dta)
##    id female  race    ses schtyp     prog read write math science socst
## 1  70   male white    low public  general   57    52   41      47    57
## 2 121 female white middle public vocation   68    59   53      63    61
## 3  86   male white   high public  general   44    33   54      58    31
## 4 141   male white   high public vocation   63    44   47      53    56
## 5 172   male white middle public academic   47    52   57      53    61
## 6 113   male white middle public academic   44    52   51      63    61
library(reshape2)
library(tidyverse)
dta_l <- dta %>% melt(., measure.vars=colnames(.)[7:11]) #將資料轉成long form
head(dta_l)
##    id female  race    ses schtyp     prog variable value
## 1  70   male white    low public  general     read    57
## 2 121 female white middle public vocation     read    68
## 3  86   male white   high public  general     read    44
## 4 141   male white   high public vocation     read    63
## 5 172   male white middle public academic     read    47
## 6 113   male white middle public academic     read    44
#使用{Rmisc} summarySE功能
dta_lsum<-Rmisc::summarySE(dta_l, measurevar="value", groupvars="variable")
dta_lsum
##   variable   N  value        sd        se       ci
## 1     read 200 52.230 10.252937 0.7249921 1.429653
## 2    write 200 52.775  9.478586 0.6702372 1.321679
## 3     math 200 52.645  9.368448 0.6624493 1.306321
## 4  science 200     NA        NA        NA       NA
## 5    socst 200 52.405 10.735793 0.7591352 1.496982
#因為有遺漏值無法計算,刪除遺漏值
dta_l <-na.omit(dta_l)
dta_lsum<-Rmisc::summarySE(dta_l, measurevar="value", groupvars="variable")
dta_lsum
##   variable   N    value        sd        se       ci
## 1     read 200 52.23000 10.252937 0.7249921 1.429653
## 2    write 200 52.77500  9.478586 0.6702372 1.321679
## 3     math 200 52.64500  9.368448 0.6624493 1.306321
## 4  science 195 51.91795  9.796035 0.7015086 1.383563
## 5    socst 200 52.40500 10.735793 0.7591352 1.496982
library(ggplot2)
pd <- position_dodge(0.1) #move them .05 to the left and right
ggplot(dta_lsum, aes(x=variable, y=value, color= variable, group=variable))+ #此時的value是平均值
  geom_errorbar(aes(ymax=value+ci, ymin=value-ci), width=.1, position = pd)+
  geom_line(position = pd)+
  geom_point(position=pd, size=3, shape=21, fill="white")+
  xlab("Exam type")+
  ylab("Mean Scores")+
  ggtitle("Mean diffrences between different exams")
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

anova(lm(value~variable, data = dta_l)) #p value=0.92,五組之間沒有顯著差異
## Analysis of Variance Table
## 
## Response: value
##            Df Sum Sq Mean Sq F value Pr(>F)
## variable    4     91  22.805  0.2308 0.9211
## Residuals 990  97817  98.805