library packages
pkgs <- c("dplyr", "tidyr", "magrittr", "ggplot2", "car", "mlmRev")
lapply(pkgs, library, character.only = TRUE)
Q2
data("minn38", package = "MASS") #Read data
dat = minn38
str(dat) # Take a look at the data
## 'data.frame': 168 obs. of 5 variables:
## $ hs : Factor w/ 3 levels "L","M","U": 1 1 1 1 1 1 1 1 1 1 ...
## $ phs: Factor w/ 4 levels "C","E","N","O": 1 1 1 1 1 1 1 3 3 3 ...
## $ fol: Factor w/ 7 levels "F1","F2","F3",..: 1 2 3 4 5 6 7 1 2 3 ...
## $ sex: Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ f : int 87 72 52 88 32 14 20 3 6 17 ...
#2-1
table(dat$sex) #answer
##
## F M
## 84 84
#2-2
table(dat[dat$phs=="C",]$sex) #answer
##
## F M
## 21 21
Q3
#Read data
data("nlschools", package = "MASS")
dat = nlschools
#Individual level
IQ.ind = with(dat, cor(IQ,lang)) #answer
IQ.ind
## [1] 0.6098195
#Class level
IQ.class = summarise(group_by(dat, class), cor(IQ,lang)) #answer
IQ.class
## # A tibble: 133 x 2
## class cor(IQ, lang)
## <fctr> <dbl>
## 1 180 0.6533810
## 2 280 0.4268428
## 3 1082 0.9185030
## 4 1280 0.3920230
## 5 1580 0.6622551
## 6 1680 0.5986652
## 7 1880 0.6017628
## 8 2180 0.5808393
## 9 2480 0.6275642
## 10 2680 0.6154951
## # ... with 123 more rows
Q4
#use read.table(whatever/the/directory/mydata.txt) to read data
str(dat) # Take a look at the data
## 'data.frame': 2287 obs. of 6 variables:
## $ school : int 1 1 1 1 1 1 1 1 1 1 ...
## $ pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ viq : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ language: int 46 45 33 46 20 30 30 57 36 36 ...
## $ csize : int 29 29 29 29 29 29 29 29 29 29 ...
## $ ses : int 23 10 15 23 10 10 23 10 13 15 ...
#Plot the data
ggplot(dat, aes(x = viq, y = language, color = ses))+
scale_color_gradient(low = "blue",high = "green")+
geom_point(alpha = .5)+
stat_summary(fun.y = mean, geom = "line", size = 0.7, color = "red")

Q6
#Read data
data("Gcsemv", package = "mlmRev")
dat = Gcsemv
dat = na.omit(dat)
#Calculate correlation based on school
dat_cor = summarise(group_by(dat, school), cor(written, course))[,c(1:2)]
ggplot(dat_cor, aes(x = dat_cor[[2]]))+
geom_histogram(binwidth = .1, fill = "skyblue")+
geom_vline(xintercept = mean(dat_cor[[2]], na.rm = TRUE), linetype = "dotted", color = "red")+
geom_text(aes(x = mean(dat_cor[[2]], na.rm = TRUE),y = 15, label = "Averaged correlations over schools",
angle = 90, vjust = -1, hjust = 1.1), color = "red")+
geom_vline(xintercept = cor(dat$written, dat$course))+
geom_text(aes(x = cor(dat$written, dat$course),y = 15, label = "Correlations computed over individuals",
angle = 90, vjust = 1, hjust = 1.1))+
labs(x="Correlation coefficient")

Q7
#Read data
library(WWGbook)
data("autism", package ="WWGbook")
#Take a look at data
dat = autism
str(dat)
## 'data.frame': 612 obs. of 4 variables:
## $ age : int 2 3 5 9 13 2 3 5 9 13 ...
## $ vsae : int 6 7 18 25 27 17 18 12 18 24 ...
## $ sicdegp: int 3 3 3 3 3 3 3 3 3 3 ...
## $ childid: int 1 1 1 1 1 3 3 3 3 3 ...
#Plot
ggplot(dat, aes(x = age, y = vsae, group = childid))+
geom_point(size = 1)+
geom_line()+
facet_grid(.~sicdegp)+
labs(x = "Age (years)", y = "VSAE score")
