Library packages

pkgs <- c("dplyr", "tidyr", "magrittr", "ggplot2", "car", "mlmRev","MASS","WWGbook")
lapply(pkgs, library, character.only = TRUE)

Question 2

dta2 <- minn38
summary(dta2$sex == "F")#2-1
##    Mode   FALSE    TRUE    NA's 
## logical      84      84       0
summary(dta2$sex == "F" & dta2$phs == "C")#2-2
##    Mode   FALSE    TRUE    NA's 
## logical     147      21       0

Question 3

dta3 <- nlschools
head(dta3)
##   lang   IQ class GS SES COMB
## 1   46 15.0   180 29  23    0
## 2   45 14.5   180 29  10    0
## 3   33  9.5   180 29  15    0
## 4   46 11.0   180 29  23    0
## 5   20  8.0   180 29  10    0
## 6   30  9.5   180 29  10    0
cor(dta3$lang,dta3$IQ)#3-1
## [1] 0.6098195
group_by(dta3,class)%>%summarise(cor_class = cor(lang,IQ))#3-2
## # A tibble: 133 × 2
##     class cor_class
##    <fctr>     <dbl>
## 1     180 0.6533810
## 2     280 0.4268428
## 3    1082 0.9185030
## 4    1280 0.3920230
## 5    1580 0.6622551
## 6    1680 0.5986652
## 7    1880 0.6017628
## 8    2180 0.5808393
## 9    2480 0.6275642
## 10   2680 0.6154951
## # ... with 123 more rows

Question 4

dta4 <- read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/verbalIQ.txt",header = T)
str(dta4)
## 'data.frame':    2287 obs. of  6 variables:
##  $ school  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pupil   : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ viq     : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ language: int  46 45 33 46 20 30 30 57 36 36 ...
##  $ csize   : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ ses     : int  23 10 15 23 10 10 23 10 13 15 ...
p4 <- ggplot(dta4,aes(dta4$viq,dta4$language))
p4 + geom_point() +  geom_smooth(method = "lm") + xlab("Verbal IQ") + ylab("Language Score") #可以看出Verbal IQ 與Language呈正相關

Question 6

dta6 <- Gcsemv
head(dta6)
##   school student gender written course
## 1  20920      16      M      23     NA
## 2  20920      25      F      NA   71.2
## 3  20920      27      F      39   76.8
## 4  20920      31      F      36   87.9
## 5  20920      42      M      16   44.4
## 6  20920      62      F      36     NA
cor_all <- with(dta6, cor(written, course,use = "pairwise.complete.obs"))
cor6 <- group_by(dta6,school)%>%summarize(cor(written,course,use = "pairwise.complete.obs"))
dta6 <- na.omit(as.data.frame(cor6))
colnames(cor6) <- c("school","correlation")
cor_school <- mean(cor6$correlation,na.rm = T)
hist(cor6$correlation, breaks = 40, col = "cyan", main = " ",xlab = "Correlation Coefficient", ylab = "Count")
abline(v = cor_all, col = "red")# red line by individuals
abline(v =cor_school , col = "blue")# blue line by schools

Question 7

dta7 <- autism
summary(dta7)
##       age              vsae           sicdegp         childid      
##  Min.   : 2.000   Min.   :  1.00   Min.   :1.000   Min.   :  1.00  
##  1st Qu.: 2.000   1st Qu.: 10.00   1st Qu.:1.000   1st Qu.: 48.75  
##  Median : 4.000   Median : 14.00   Median :2.000   Median :107.50  
##  Mean   : 5.771   Mean   : 26.41   Mean   :1.956   Mean   :105.38  
##  3rd Qu.: 9.000   3rd Qu.: 27.00   3rd Qu.:3.000   3rd Qu.:158.00  
##  Max.   :13.000   Max.   :198.00   Max.   :3.000   Max.   :212.00  
##                   NA's   :2
p7 <- ggplot(dta7,aes(x = age, y = vsae , group = childid))
p7 + geom_point()+ geom_line() + facet_grid(.~ sicdegp) + xlab("Age(years)") + ylab("VSAE score")