Run the R script to see what happens. Change the ‘df’ parameter to a slightly larger integer and do it again. What statistical concept does this script illustrate?
x <- seq(-pi*2, 2*pi, .05)
z <- dnorm(x)
plot(x, z, type="l", bty="L", xlab="Standard unit", ylab="Density")
y <- dt(x, df=3) # T分佈:x 所對應的密度:dt(x,df);df:自由度;
polygon(c(x, rev(x)), c(y, rev(z)), col='light gray') # 填色
lines(x, y, col='cadetblue')
y <- dt(x, df=8) # T分佈:x 所對應的密度:dt(x,df);df:自由度;
polygon(c(x, rev(x)), c(y, rev(z)), col='yellow') # 填色
lines(x, y, col='cadetblue') df自由度越高兩條線越契合
Doll (1955) showed per capita consumption of cigarettes in 11 countries in 1930, and the death rates from lung cancer for men in 1950. Use R base graphics to generate the plot shown below. Source: Freedman, et al. (1997). Statistics. pp. 148-150.
m0 <- lm(death ~ consumption, data = dta)
with(dta, plot(consumption, death, type = "n", xlab = " Consumption(per capita per year)", ylab = "Death rate(per million)", xlim = c(min(consumption)*0.9,max(consumption)*1.1),ylim = c(min(death)*0.9,max(death)*1.1)))
with(dta, text(consumption, death, labels = Country, cex = 0.5))
abline(m0, lty =2)
title(main = "Lung cancer and Cigarettes Consumption")Use R base graphics to create the national flag of Denmark
width <- 35
height <- 28
red <- cbind(c(0,width,width,0),c(0,0,height,height))
plot(red, type="n", col="red", axes=F, xlim=c(0,width), ylim=c(0,height), xlab="", ylab="")
polygon(red, density=NULL, col="red", border="red")
whiteH <- cbind(c(0,width,width,0),c(12,12,16,16))
polygon(whiteH, density=NULL, col="white", border="white")
whiteV <- cbind(c(11.35,15.14,15.14,11.35),c(0,0,height,height))
polygon(whiteV, density=NULL, col="white", border="white")Run the R script to see what happens first and then explain how the effect is achieved by the script.
Draw a pie chart to represent 50 shades of gray. Hint: Use ‘?gray’ to examine the gray level specification documented for the gray{grDevices}. Use ‘?pie’ to study the function for making pie charts documented for pie{graphics}.
pie(rep(1, 50), col=gray(seq(0, 1, length=50)),
labels=grey(seq(0, 1, length=50)), clockwise=T, cex=0.5)This R script illustrates how to split the plot region to include histograms on the margins of a scatter diagram using the Galton{HistData} data set. Compile it as a html document with comments on each code chunk.
# Galton's data on the heights of parents and their children
# install.packages("HistData")
# get data
dta <- HistData::Galton
# create matrix,是等一下繪圖的順序(0不畫)
zones <- matrix(c(2, 0, 1, 3), ncol=2, byrow=TRUE)
# 分隔畫布,寬高指分成兩份的比例
layout(zones, widths=c(4/5, 1/5), heights = c(1/5, 4/5))
# parent的histogram,先不畫
xh <- with(dta, hist(parent, plot=FALSE))
# child的histogram,先不畫
yh <- with(dta, hist(child, plot=FALSE))
# 找出兩個直方圖的column最大是多大
ub <- max(c(xh$counts, yh$counts))
# par函數設定繪圖參數
# mar設定圖表邊界距離畫布邊界要留多寬,mar = c(bottom, left, top, right)
par(mar=c(3, 3, 1, 1)) # 在第一個要畫的區域(左下)左邊下面個留三行,右上留一行
# sunflowerplot
with(dta, sunflowerplot(parent, child))
# 在第二個要畫的區域(左上)左邊留三行,右上留一行
par(mar=c(0, 3, 1, 1))
# parent barplot
barplot(xh$counts, axes=FALSE, ylim=c(0, ub), space=0)
# 在第三個要畫的區域(右下)下面留三行,右上留一行
par(mar=c(3, 0, 1, 1))
# child barplot
barplot(yh$counts, axes=FALSE, xlim=c(0, ub), space=0, horiz=TRUE)
# oma設定圖形的外邊界大小,oma = c(bottom,left,to,right)
par(oma=c(3, 3, 0, 0))
# mtext() 在現有的圖形之四個邊緣之一加上文字
# side(1下 2左 3上 4右)
# line文字對應座標軸要再有多少位移
# at自己設座標軸要出現的位置
# adj=0對左或對下對齊,adj=1對右或對上對齊
mtext("Average height of parents (in inch)", side=1, line=2,
outer=TRUE, adj=0,
at=.4 * (mean(dta$parent) - min(dta$parent))/(diff(range(dta$parent))))
mtext("Height of child (in inch)", side=2, line=2,
outer=TRUE, adj=0,
at=.4 * (mean(dta$child) - min(dta$child))/(diff(range(dta$child))))Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct side-by-side box plots for the data from different age groups and comment briefly.
Age
25-34 35-44 45-54 55-64 65-74
Country
Canada 22 27 31 34 24
Israel 9 19 10 14 27
Japan 22 19 21 31 49
Austria 29 40 52 53 69
France 16 25 36 47 56
Germany 28 35 41 49 52
Hungary 48 65 84 81 107
Italy 7 8 11 18 27
Netherlands 8 11 18 20 28
Poland 26 29 36 32 28
Spain 4 7 10 16 22
Sweden 28 41 46 51 35
Switzerland 22 34 41 50 51
UK 10 13 15 17 22
USA 20 22 28 33 37
Source: Everitt, B.S., & Hothorn, T. (2009). A Handbook of Statistical Analyses Using R. 2nd ed. p. 15.
boxplot(Counts ~ Age, data=dtaL, xlab="Age", ylab="Deaths per 100,000 from male suicides")
# indicate grand mean
abline(h=mean(dtaL$Counts), lty=2)The R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example. Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.
dta <- ChickWeight
dta_diet <- split(dta, dta$Diet)
par(mfrow=c(2, 2), mar=c(4, 4, 0, 0))
lapply(dta_diet, function(x) {
plot(x$weight ~ x$Time, xlab="Time (day)", ylab="Weight (gm)")
legend('topleft', paste("Diet", x$Diet[1], sep="="), bty='n')}
)dta <- MASS::nlschools
head(dta)
dta_class <- split(dta, dta$class)[25:30]
par(mfrow=c(2, 3), mar=c(3, 3, 3, 3))
lapply(dta_class, function(x) {
hist(x$IQ, xlab="IQ", main = paste("class", x$class[1], sep=" "))}
)Use the dataset to replicate the plot below:
with(dta, plot(SAT_No, GPA_No, xlim=c(1150, 1400), ylim=c(2.6,3.4), xaxs="i", yaxs="i",
xlab="SAT(V+M)", ylab="First Year GPA", font.lab=2, pch=19, cex=2))
with(dta, segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lwd=2, col="black"))
with(dta, segments(1200, 2.85, 1312, 3.12, lwd=3, col="black"))
with(dta, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=2))
with(dta, text(1312+26, 3.12, labels='Bowdoin', cex=.8))
with(dta, text(SAT_Yes+26, GPA_Yes, labels=College, cex=.8))
legend("topleft", c("Submitted SAT Scores", "Did NOT Submit SAT Scores"), pch=c(21, 19), bg=c("white","black"), pt.cex=1.5, bty="n")
axis(1,seq(1150, 1400, 50/3), labels = FALSE)
axis(2,seq(2.6, 3.4, 0.2/5), labels = FALSE)Use the free recall data to replicate the figure
reported in Murdock, B. B. (1962). The serial position effect of free recall. Journal of Experimental Psychology, 64, 482-488.
names1 <- paste("X", 1:10)
names2 <- paste("X", 1:15)
dta10.2 <- read.table("Murd62/fr10-2.txt", sep = "", col.names = names1, fill = T, nrows = 1200)
dta10.2['tag'] = '10.2'
dta15.2 <- read.table("Murd62/fr15-2.txt", sep = "", col.names = names2, fill = T)
dta15.2['tag'] = '15.2'
dta20.1 <- read.table("Murd62/fr20-1.txt", sep = "", col.names = names2, fill = T)
dta20.1['tag'] = '20.1'
dta20.2 <- read.table("Murd62/fr20-2.txt", sep = "", col.names = names2, fill = T)
dta20.2['tag'] = '20.2'
dta30.1 <- read.table("Murd62/fr30-1.txt", sep = "", col.names = names2, fill = T)
dta30.1['tag'] = '30.1'
dta40.1 <- read.table("Murd62/fr40-1.txt", sep = "", col.names = names2, fill = T)
dta40.1['tag'] = '40.1'
dta_recall <- list(dta10.2, dta15.2, dta20.1, dta20.2, dta30.1, dta40.1)
dta <- lapply(dta_recall, function(x){y <- stack(x[1:10])
y <- as.data.frame(table(y$values)/1200) %>%
mutate(tag=x[1,"tag"])
}) %>% bind_rows()## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
dta$tag <- factor(dta$tag)
# remove rows have index == 88
dta <- dta[!dta$Var1 == "88", ]
dta <- dta[!(dta$Var1 == 16 & dta$tag == "15.2"), ]
dta <- dta[!(dta$Var1 == 31 & dta$tag == "30.1"), ]
dta <- dta[!(dta$Var1 == 41 & dta$tag == "40.1"), ]
dta <- dta[!(dta$Var1 == 50 & dta$tag == "40.1"), ]
head(dta)# plot(dta$Var1,dta$Freq, type = "b",xlim = c(0, 40), ylim = c(0, 1), xlab = "SERIAL POSITION", ylab = "PROBABILITY OF RECALL", xaxs = "i", yaxs = "i")
plot(0,0, type = "n",xlim = c(0, 40), ylim = c(0, 1), xlab = "SERIAL POSITION", ylab = "PROBABILITY OF RECALL", xaxs = "i", yaxs = "i")
pchl = c(1,19)
for (i in 1:length(levels(dta$tag))) {
points(dta$Var1[dta$tag == levels(dta$tag)[i]],
dta$Freq[dta$tag == levels(dta$tag)[i]],pch=pchl[i%%2+1])
lines(dta$Var1[dta$tag == levels(dta$tag)[i]],
dta$Freq[dta$tag == levels(dta$tag)[i]])
}
axis(1,seq(0, 40, 2), labels = FALSE)
axis(2,seq(0, 1, 0.1), labels = FALSE)
# text label
text(2, 0.9, "10-2")
lines(c(2, 6), c(0.85, 0.6))
text(16, 0.7, "15-2")
lines(c(14, 12.5), c(0.7, 0.7))
text(11, 0.41, "20-2")
lines(c(11, 13), c(0.39, 0.3))
text(19, 0.25, "20-1")
lines(c(19, 16), c(0.27, 0.35))
text(21, 0.55, "30-1")
lines(c(21, 26), c(0.52, 0.39))
text(33, 0.8, "40-1")
lines(c(33, 37.5), c(0.75, 0.59))