0413 base graphic HW 3,5
HW 3
loading data and check data structure
## lang IQ class GS SES COMB
## 1 46 15.0 180 29 23 0
## 2 45 14.5 180 29 10 0
## 3 33 9.5 180 29 15 0
## 4 46 11.0 180 29 23 0
## 5 20 8.0 180 29 10 0
## 6 30 9.5 180 29 10 0
## 'data.frame': 2287 obs. of 6 variables:
## $ lang : int 46 45 33 46 20 30 30 57 36 36 ...
## $ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ GS : int 29 29 29 29 29 29 29 29 29 29 ...
## $ SES : int 23 10 15 23 10 10 23 10 13 15 ...
## $ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
data manipulation
library(dplyr)
dta.1<-dta%>%dplyr::group_by(class)%>%
summarize(count=n())%>%filter(count>30) # find out class with over 30 pupils
dta.over30<- dta[dta$class%in%dta.1$class,] # extract those class with over 30 pupils from the full dataset
dta2<-split(dta.over30, as.character(dta.over30$class)) # to split the data to list by classplot
par(mfrow=c(3, 2), mar=c(2, 2, 2, 2), oma=c(5, 5, 5, 5))
lapply(dta2, function(x) { hist(x$IQ,
xlab="IQ",
main=" ",
xlim=c(6, 20), ylim=c(0, 20)) # make x/y axis consistent over 5 panel
legend('topleft', paste("Class", x$class[1], sep=":"), bty='n')})
mtext("Histogram of IQ in class with over 30 pupils", side=3, line=2,
col="Black", outer=TRUE, cex=1.5,font=2) # add common main title in outer marginHW 5
download the zip file online and unzip it
fl<-"http://memory.psych.upenn.edu/files/pubs/Murd62.data.tgz"
download.file(fl,destfile="tmp.tar.gz")
untar("tmp.tar.gz",files="Murd62")
temp<-list.files("Murd62", pattern="*.txt", full.names=TRUE)data manipulation
names1<-paste("V", 1:15)
temp.1<-lapply(temp, read.table, sep="", col.names=names1, fill=T)
temp.2<-lapply(temp.1, function(x) {x <- x[-1201, ]}) # remove 0
name<-c("10-2", "15-2", "20-1", "20-2", "30-1", "40-1")
dat.name<-Map(cbind, temp.2, group=name)
dta.prob <- lapply(temp.2, function(x){ x <- stack(x)
table(x$values) / 1200})
dta.prob2 <- lapply(dta.prob, as.data.frame) # data.frame to use rbind.fill
dta.prob3<-do.call("rbind.fill", dta.prob2)
dta.prob3$site<-(1:nrow(dta.prob3))
site.row<-dta.prob3%>%filter(Var1==88) # find the last variable of every list to find how many frequency in each list
dta.prob3$group<-c(rep("10-2",11), rep("15-2",17), rep( "20-1",21), rep("20-2", 21), rep("30-1", 32), rep("40-1", 43))
dta.prob.m<-dta.prob3[-site.row$site , -c(3)]
# remove outlier
dta.prob.m <- dta.prob.m[!(dta.prob.m$Var1 == 16 & dta.prob.m$group == "15-2"), ]
dta.prob.m <- dta.prob.m[!(dta.prob.m$Var1 == 31 & dta.prob.m$group == "30-1"), ]
dta.prob.m <- dta.prob.m[!(dta.prob.m$Var1 == 41 & dta.prob.m$group == "40-1"), ]
dta.prob.m <- dta.prob.m[!(dta.prob.m$Var1 == 50 & dta.prob.m$group == "40-1"), ]plot
xtick<-seq(0, 40, by=5)
ytick<-seq(.00, 1.00, by=.2)
plot.new()
par(mar=c(2,2,2,3), oma=c(5,5,5,5))
plot(0,type='n', xaxs="i", yaxs="i",
xlim = c(0, 40),
ylim = c(0, 1),
axes = F)
# to revise the ticks label
axis(1, at=seq(0, 40), labels = F)
text(x=xtick, par("usr")[3],
labels = xtick, pos = 1, xpd = TRUE)
axis(2, seq(0, 1, 0.05), labels=F)
text(par("usr")[1], ytick,
labels = ytick, pos = 2, xpd = TRUE)
# I thinks there must be a better soultion instead of depict each line and points
lines(dta.prob.m$Var1[dta.prob.m$group == "10-2"],
dta.prob.m$Freq[dta.prob.m$group == "10-2"], type = "l", pch = 1)
lines(dta.prob.m$Var1[dta.prob.m$group == "15-2"],
dta.prob.m$Freq[dta.prob.m$group == "15-2"], type = "l", pch = 1)
lines(dta.prob.m$Var1[dta.prob.m$group == "20-1"],
dta.prob.m$Freq[dta.prob.m$group == "20-1"], type = "l", pch = 1)
lines(dta.prob.m$Var1[dta.prob.m$group == "20-2"],
dta.prob.m$Freq[dta.prob.m$group == "20-2"], type = "l", pch = 1)
lines(dta.prob.m$Var1[dta.prob.m$group == "30-1"],
dta.prob.m$Freq[dta.prob.m$group == "30-1"], type = "l", pch = 1)
lines(dta.prob.m$Var1[dta.prob.m$group == "40-1"],
dta.prob.m$Freq[dta.prob.m$group == "40-1"], type = "l", pch = 1)
points(dta.prob.m$Var1[dta.prob.m$group == "10-2"],
dta.prob.m$Freq[dta.prob.m$group == "10-2"], pch = 19)
points(dta.prob.m$Var1[dta.prob.m$group == "15-2"],
dta.prob.m$Freq[dta.prob.m$group == "15-2"], pch = 1)
points(dta.prob.m$Var1[dta.prob.m$group == "20-1"],
dta.prob.m$Freq[dta.prob.m$group == "20-1"], pch = 1)
points(dta.prob.m$Var1[dta.prob.m$group == "20-2"],
dta.prob.m$Freq[dta.prob.m$group == "20-2"], pch = 19)
points(dta.prob.m$Var1[dta.prob.m$group == "30-1"],
dta.prob.m$Freq[dta.prob.m$group == "30-1"], pch = 1)
points(dta.prob.m$Var1[dta.prob.m$group == "40-1"],
dta.prob.m$Freq[dta.prob.m$group == "40-1"], pch = 19)
# not sure if there is any function to do this faster
text(5, 0.85, "10-2")
lines(c(8.2, 6), c(0.7, 0.85))
text(14, 0.5, "15-2")
lines(c(12, 13), c(0.56, 0.5))
text(11, 0.4, "20-2")
lines(c(14, 12), c(0.3, 0.4))
text(18, 0.25, "20-1")
lines(c(16, 17), c(0.3, 0.25))
text(25, 0.65, "30-1")
lines(c(28, 26), c(0.55, 0.65))
text(36, 0.65, "40-1")
lines(c(38.5, 37), c(0.6, 0.65))
# label
mtext("PROBABILITY OF RECALL", side=2, outer=TRUE, line=0 )
mtext("SERIAL POSITION", side=1, outer=TRUE, line=0 )