Library packages
pkgs <- c("dplyr", "tidyr", "magrittr", "ggplot2", "car", "mlmRev","MASS","WWGbook")
lapply(pkgs, library, character.only = TRUE)
Question 2
dta2 <- minn38
summary(dta2$sex == "F")#2-1
## Mode FALSE TRUE NA's
## logical 84 84 0
summary(dta2$sex == "F" & dta2$phs == "C")#2-2
## Mode FALSE TRUE NA's
## logical 147 21 0
Question 3
dta3 <- nlschools
head(dta3)
## lang IQ class GS SES COMB
## 1 46 15.0 180 29 23 0
## 2 45 14.5 180 29 10 0
## 3 33 9.5 180 29 15 0
## 4 46 11.0 180 29 23 0
## 5 20 8.0 180 29 10 0
## 6 30 9.5 180 29 10 0
cor(dta3$lang,dta3$IQ)#3-1
## [1] 0.6098195
group_by(dta3,class)%>%summarise(cor_class = cor(lang,IQ))#3-2
## # A tibble: 133 × 2
## class cor_class
## <fctr> <dbl>
## 1 180 0.6533810
## 2 280 0.4268428
## 3 1082 0.9185030
## 4 1280 0.3920230
## 5 1580 0.6622551
## 6 1680 0.5986652
## 7 1880 0.6017628
## 8 2180 0.5808393
## 9 2480 0.6275642
## 10 2680 0.6154951
## # ... with 123 more rows
Question 4
dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/verbalIQ.txt",header = T)
str(dta4)
## 'data.frame': 2287 obs. of 6 variables:
## $ school : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ viq : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ language: int 46 45 33 46 20 30 30 57 36 36 ...
## $ csize : int 29 29 29 29 29 29 29 29 29 29 ...
## $ ses : int 23 10 15 23 10 10 23 10 13 15 ...
p4 <- ggplot(dta4,aes(dta4$viq,dta4$language))
p4 + geom_point() + geom_smooth(method = "lm") + xlab("Verbal IQ") + ylab("Language Score") #可以看出Verbal IQ 與Language呈正相關

Question 6
dta6 <- Gcsemv
head(dta6)
## school student gender written course
## 1 20920 16 M 23 NA
## 2 20920 25 F NA 71.2
## 3 20920 27 F 39 76.8
## 4 20920 31 F 36 87.9
## 5 20920 42 M 16 44.4
## 6 20920 62 F 36 NA
cor_all <- with(dta6, cor(written, course,use = "pairwise.complete.obs"))
cor6 <- group_by(dta6,school)%>%summarize(cor(written,course,use = "pairwise.complete.obs"))
dta6 <- na.omit(as.data.frame(cor6))
colnames(cor6) <- c("school","correlation")
cor_school <- mean(cor6$correlation,na.rm = T)
hist(cor6$correlation, breaks = 40, col = "cyan", main = " ",xlab = "Correlation Coefficient", ylab = "Count")
abline(v = cor_all, col = "red")# red line by individuals
abline(v =cor_school , col = "blue")# blue line by schools

Question 7
dta7 <- autism
summary(dta7)
## age vsae sicdegp childid
## Min. : 2.000 Min. : 1.00 Min. :1.000 Min. : 1.00
## 1st Qu.: 2.000 1st Qu.: 10.00 1st Qu.:1.000 1st Qu.: 48.75
## Median : 4.000 Median : 14.00 Median :2.000 Median :107.50
## Mean : 5.771 Mean : 26.41 Mean :1.956 Mean :105.38
## 3rd Qu.: 9.000 3rd Qu.: 27.00 3rd Qu.:3.000 3rd Qu.:158.00
## Max. :13.000 Max. :198.00 Max. :3.000 Max. :212.00
## NA's :2
p7 <- ggplot(dta7,aes(x = age, y = vsae , group = childid))
p7 + geom_point()+ geom_line() + facet_grid(.~ sicdegp) + xlab("Age(years)") + ylab("VSAE score")
