The ChickWeight.R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example.
Task: Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.
# input data
<- ChickWeight
dta
# split into 4 list based on the Diet column
<- split(dta, dta$Diet)
dta_diet
# set Graphical Parameters
# mfrow: A vector of the form c(nr, nc). Subsequent figures will be drawn in an nr-by-nc array on the device by columns (mfcol), or rows (mfrow), respectively.
# mar: A numerical vector of the form c(bottom, left, top, right) which gives the number of lines of margin to be specified on the four sides of the plot. The default is c(5, 4, 4, 2) + 0.1.
par(mfrow=c(2, 2), mar=c(4, 4, 1, 1))
# scatter plot
lapply(dta_diet,
function(x) {
plot(x$weight ~ x$Time,
xlab="Time (day)",
ylab="Weight (gm)")
legend('topleft',
paste("Diet", x$Diet[1], sep="="),
bty='n')}
|>
)invisible()
# input data
<- MASS::nlschools
dat1
# split by class
<- split(dat1, dat1$class)
dat1_class
# find the classes with over 30 pupils
<- lapply(dat1_class, function(x) nrow(x[])>30)
pupils30 which(pupils30 == TRUE)
## 5480 15580 15980 16180 18380
## 26 78 80 82 91
# set plot
par(mfrow=c(1, 5), mar=c(4, 4, 1, 1))
# histogram
lapply(dat1_class[c("5480","15580","15980","16180","18380")],
function(x) {
hist(x$IQ,
xlab = "IQ",
ylab = "Number of student",
main = paste("class", x$class[1], sep=" "))
}|>
)invisible()
Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}.
Task: Construct a side-by-side box plot of the data across different age groups and comment briefly.
# input data
<- HSAUR3::suicides2
dat2
# column rename
names(dat2) <- c("25-34", "35-44", "45-54", "55-64","65-74")
# wide to long
<- data.table::melt(dat2, variable.name = "Age", value.name="Number") dat2_l
boxplot(Number ~ Age,
data = dat2_l,
horizontal=T,
varwidth = T,
frame = FALSE,
cex.axis = 0.8,
xlab="Deaths per 100,000 from male suicides")
abline(v=seq(0, 100, 10), lty=3, col='gray') # grid
abline(v=mean(dat2_l$Number), lty=2, col="#00AD9A") # mean line
stripchart(Number ~ Age,
data = dat2_l,
add=T,
col='darkgray', pch=1,
cex=.8,
method='jitter')
Task: Use the sat_gpa.txt dataset to replicate the plot below:
<- read.table("sat_gpa.txt", header = T) dat3
# set margin area for annotation
par(mar=c(10, 2, 2, 2))
with(dat3, plot(SAT_No ~ GPA_No,
type="n",
xlim=c(1150, 1400),
ylim=c(2.6, 3.4),
xlab="SAT(V+M)",
ylab="First Year GPA"))
# add pair-line
with(dat3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lwd=2, col="black"))
segments(x0=1200, y0=2.85, x1=1312, y1=3.12, lwd=4, col=rgb(0,0,0,.6))
# Point
with(dat3, points(x=SAT_Yes, y=GPA_Yes, pch=21, bg="white", cex=2))
with(dat3, points(x=SAT_No, y=GPA_No, pch=21, bg="black", cex=2))
# Text label of College
with(dat3[-3,], text(SAT_Yes+25, GPA_Yes, labels=College, cex=.8))
## font = 1 is the default text font (neither italic nor bold), font = 2 specifies bold face text, font = 3 specifies italic text, and font = 4 specifies both bold and italic text.
with(dat3, text(1312+25, 3.12, labels="Bowdoin", cex=.8, font=2))
# add legend
legend("topleft", c("Submitted SAT Scores", "Did NOT Submit SAT Scores"),
pch=c(21, 19),
bg=c("white","black"),
pt.cex=1.5, bty="n")
# set axis and scale but without label
axis(1, seq(1150, 1400, length=16), labels = F)
axis(2, seq(2.6, 3.4, length=21), labels = F)
# add annotation
mtext("Figure 1.4 The mean SAT coupled with the mean first-year GPA for the class \n of 1999 at six schools shown for those who submitted SAT scores for admis- \n sion and those who did not", side=1, cex=1, adj = 0, line=7)
The Law School Admission Test (LSAT) consists of a number of dichotomous items, which can be answered correctly or incorrectly. These items are constructed so that a person who scores high on the test is more likely to do well in a law school. The data set LSAT{ltm} contains answers by 1,000 persons to 5 test items.
Task: Generate a plot similar to the one below:
<- ltm::LSAT
dat1
names(dat1) <- c("Item1", "Item2", "Item3", "Item4","Item5")
<- dat1 |>
dat1L ::mutate(Total = rowSums(dat1,c(1:5)))|>
dplyr::pivot_longer(cols = starts_with("Item"),
tidyrnames_to = "Item",
values_to = "Score")
<- as.data.frame(table(dat1L))
dat1LFreq
<- dat1LFreq |>
dat1final ::group_by(Total, Item)|>
dplyr::mutate(Procorrect=Freq/sum(Freq))|>
dplyrsubset(Total!=c(0,5)& Score==1)
$Total<-as.numeric(as.character(dat1final$Total)) dat1final
plot(0, 0, bty = "n", type = "n",
xlim = c(0, 5),
ylim = c(0, 1),
xlab = "Total score",
ylab = "Proportional Correct")
# point and line
lines(dat1final$Total[dat1final$Item == "Item1"],
$Procorrect[dat1final$Item == "Item1"],
dat1finaltype = "b",
col=1,
pch = 1)
lines(dat1final$Total[dat1final$Item == "Item2"],
$Procorrect[dat1final$Item == "Item2"],
dat1finaltype = "b",
col = 2,
pch = 2)
lines(dat1final$Total[dat1final$Item == "Item3"],
$Procorrect[dat1final$Item == "Item3"],
dat1finaltype = "b",
col = 3,
pch = 3)
lines(dat1final$Total[dat1final$Item == "Item4"],
$Procorrect[dat1final$Item == "Item4"],
dat1finaltype = "b",
col = 4,
pch = 4)
lines(dat1final$Total[dat1final$Item == "Item5"],
$Procorrect[dat1final$Item == "Item5"],
dat1finaltype = "b",
col = 5,
pch = 5)
grid()
legend("topleft",
c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5"),
pch = c(1, 2, 3, 4, 5),
col = c(1, 2, 3, 4, 5),
bty = "n", lty = 1)
Task: Complete the hw_bmi.R script to create the following plot
# height, weight and bmi
# generate weights
<- seq(40, 160, length.out=600)
wt
# generate heights
<- seq(1.40, 2.00, length.out=600)
ht
# cross wt by ht
<- expand.grid(wt=wt, ht=ht)
wtht
# function to compute bmi from wt and ht
<- function(w, h) {w/(h*h)}
bmi_wh
# generate data matrix
<- matrix(bmi_wh(wtht$wt, wtht$ht), length(ht), length(wt)) bmiwtht
# draw the contour
<-c(rgb(1, 1, 1, 0), rgb( 1, 1, 1, 0),rgb(.94, .50, .50, 0.3))
colorbar
contour(wt, ht, bmiwtht, bty="n",
levels = c(18.5, 24.9, 30),
drawlabels = F,
ylab = "Height (m)",
xlab = "Weight (kg)",
main = "BMI categories by height and weight")
# add grid lines
grid()
# annotate the bmi categories
text(105, 1.8, "Obese", cex=1, srt=45)
text(92, 1.8, "Overweight", cex=1, srt=45)
text(75, 1.8, "Normal", cex=1, srt=45)
text(55, 1.8, "Underweight", cex=1, srt=55)
image(wt, ht, bmiwtht, col = colorbar, breaks= c(18.5, 24.9, 30, 100), bty="n",
ylab = "Height (m)",
xlab = "Weight (kg)",
main = "BMI categories by height and weight",
axes = T,
xlim = c(40, 160),
ylim = c(1.4, 2.0),
add = TRUE)
Task: Use the free recall data to replicate the figure below: MURDOCK 1962
The format of the data in these files is serial position as a function of output position. Each row of data corresponds to one subject=trial.
For example, if the data were: 20 19 18 15 1 5 8, that means that the subject recalled item 20, then item 19, etc.
The code 88 means that the subject made an extra-list intrusion
untar("Murd62.data.tgz")
<- read.table("Murd62/fr10-2.txt", sep = "", col.names = c(1:10), fill = T, nrows = 1200)
dat10_2 "Group"] = "10-2"
dat10_2[
<- read.table("Murd62/fr15-2.txt", sep = "", col.names = c(1:15), fill = T)
dat15_2 "Group"] = "15-2"
dat15_2[
<- read.table("Murd62/fr20-1.txt", sep = "", col.names = c(1:20), fill = T)
dat20_1 "Group"] = "20-1"
dat20_1[
<- read.table("Murd62/fr20-2.txt", sep = "", col.names = c(1:20), fill = T)
dat20_2 "Group"] = "20-2"
dat20_2[
<- read.table("Murd62/fr30-1.txt", sep = "", col.names = c(1:30), fill = T)
dat30_1 "Group"] = "30-1"
dat30_1[
<- read.table("Murd62/fr40-1.txt", sep = "", col.names = c(1:40), fill = T)
dat40_1 "Group"] = "40-1"
dat40_1[
<- list(dat10_2, dat15_2, dat20_1, dat20_2, dat30_1, dat40_1)
dat_recall
<- lapply(dat_recall,
dat3 function(x) {y <- x |>
::pivot_longer(cols = starts_with("X"),
tidyrnames_to = "Serial",
values_to = "Item")
<- as.data.frame(table(y$Item)/1200) |>
y ::mutate(Group = x$Group[1])
dplyr|>
} ) ::bind_rows() dplyr
$Group <- factor(dat3$Group)
dat3
# remove "88"
<- dat3[!dat3$Var1 == "88", ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "16" & dat3_re$Group == "15-2"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "31" & dat3_re$Group == "30-1"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "41" & dat3_re$Group == "40-1"), ]
dat3_re <- dat3_re[!(dat3_re$Var1 == "50" & dat3_re$Group == "40-1"), ]
dat3_re
head(dat3_re)
## Var1 Freq Group
## 1 1 0.7025000 10-2
## 2 2 0.5691667 10-2
## 3 3 0.4750000 10-2
## 4 4 0.4633333 10-2
## 5 5 0.4591667 10-2
## 6 6 0.5675000 10-2
plot(0,0, bty="n", type = "n",xlim = c(0, 40), ylim = c(0, 1),
xlab = "SERIAL POSITION",
ylab = "PROBABILITY OF RECALL",
xaxs = "i", yaxs = "i")
# point and line
points(dat3_re$Var1[dat3_re$Group == "10-2"],
$Freq[dat3_re$Group == "10-2"],
dat3_repch = 19)
lines(dat3_re$Var1[dat3_re$Group == "10-2"],
$Freq[dat3_re$Group == "10-2"])
dat3_re
points(dat3_re$Var1[dat3_re$Group == "15-2"],
$Freq[dat3_re$Group == "15-2"],
dat3_repch = 1)
lines(dat3_re$Var1[dat3_re$Group == "15-2"],
$Freq[dat3_re$Group == "15-2"])
dat3_re
points(dat3_re$Var1[dat3_re$Group == "20-2"],
$Freq[dat3_re$Group == "20-2"],
dat3_repch = 19)
lines(dat3_re$Var1[dat3_re$Group == "20-2"],
$Freq[dat3_re$Group == "20-2"])
dat3_re
points(dat3_re$Var1[dat3_re$Group == "20-1"],
$Freq[dat3_re$Group == "20-1"],
dat3_repch = 1)
lines(dat3_re$Var1[dat3_re$Group == "20-1"],
$Freq[dat3_re$Group == "20-1"])
dat3_re
points(dat3_re$Var1[dat3_re$Group == "30-1"],
$Freq[dat3_re$Group == "30-1"],
dat3_repch = 1)
lines(dat3_re$Var1[dat3_re$Group == "30-1"],
$Freq[dat3_re$Group == "30-1"])
dat3_re
points(dat3_re$Var1[dat3_re$Group == "40-1"],
$Freq[dat3_re$Group == "40-1"],
dat3_repch = 19)
lines(dat3_re$Var1[dat3_re$Group == "40-1"],
$Freq[dat3_re$Group == "40-1"])
dat3_re
# text label
text(2, 0.9, "10-2")
lines(c(2, 6), c(0.85, 0.6))
text(15, 0.63, "15-2")
lines(c(14, 13.5), c(0.65, 0.7))
text(13, 0.48, "20-2")
lines(c(13, 14), c(0.45, 0.3))
text(19, 0.28, "20-1")
lines(c(17.5, 17), c(0.28, 0.36))
text(23, 0.6, "30-1")
lines(c(21.5, 21, 26), c(0.55, 0.52, 0.39))
text(35, 0.8, "40-1")
lines(c(33.5, 33, 37.5), c(0.77, 0.75, 0.59))
# axis
axis(1,seq(0, 40, 2), labels = FALSE)
axis(2,seq(0, 1, 0.1), labels = FALSE)
A dataset, raz2005{rmcorr}, contains two repeated measures, on two occasions (Time), of age (in years) and adjusted volume of cerebellar hemispheres from 72 participants.
Task: Use it to replicate the following figure in which participants older than 65 years of age at Time One are plotted on a shifted panel and those whose cerebellar volume increased from Time One to Time Two are shown in blue color.
# Input
<- rmcorr::raz2005
dat5
# kind of similar to the concept of slope and intercept
<- dat5 |>
dat5_re ::group_by(Participant) |>
dplyr::mutate(DiffA = Age - dplyr::lag(Age),
dplyrDiffV = Volume - dplyr::lag(Volume))
# find out whose cerebellar volume increased from Time 1 to Time 2
as.data.frame(subset(dat5_re, DiffA>0 & DiffV>0))
## Participant Time Age Volume DiffA DiffV
## 1 18 2 62.70207 148.6754 5.780508 10.7698922
## 2 22 2 66.99092 135.3493 4.930349 4.8035595
## 3 29 2 52.53263 125.8514 5.534490 5.8572004
## 4 36 2 50.78699 129.5912 5.897684 0.2437722
## 5 44 2 55.72681 131.1199 5.715749 3.1674448
## 6 62 2 48.99494 137.7438 2.174556 0.2292488
## 7 63 2 52.04664 138.4257 4.304182 1.5300961
# subset data
<- subset(dat5, Participant %in% c(18, 22, 29, 36, 44, 62, 63)) dat5_positive
with(dat5, plot(Age, Volume, bty="n",
xlim = c(20, 90),
ylim = c(100, 170),
xlab = "Age (year)",
ylab = "Cerebellar volume"))
# add grid
grid(nx=NA, ny=NULL)
# arrows for all
arrows(dat5$Age[dat5$Time == 1],
$Volume[dat5$Time == 1],
dat5$Age[dat5$Time == 2],
dat5$Volume[dat5$Time == 2],
dat5length=.08,
col="gray")
# cover the arrows with increased
arrows(dat5_positive$Age[dat5_positive$Time == 1],
$Volume[dat5_positive$Time == 1],
dat5_positive$Age[dat5_positive$Time == 2],
dat5_positive$Volume[dat5_positive$Time == 2],
dat5_positivelength=.08, col="skyblue", lwd = 2)