The R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example.
Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.
#input data
<- ChickWeight
dta head(dta)
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
str(dta)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
## $ weight: num 42 51 59 64 76 93 106 125 149 171 ...
## $ Time : num 0 2 4 6 8 10 12 14 16 18 ...
## $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
## $ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Diet
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Time"
## ..$ y: chr "Body weight"
## - attr(*, "units")=List of 2
## ..$ x: chr "(days)"
## ..$ y: chr "(gm)"
typeof(dta)
## [1] "list"
#split()依據4個level分割資料
<- split(dta, dta$Diet) #依據Diet分割資料
dta_diet #head(dta_diet) #看一下分割後的資料
#同時畫好幾張圖時使用par()分割畫面
par(mfrow=c(2, 2), mar=c(4, 4, 0, 0))
## mfrow=c(2, 2)建立2X2的空間
## mar=c(4, 4, 0, 0)=c(bottom, left, top, right)設定圖與四個邊界的距離
# scatter plot
lapply(dta_diet, function(x) {
plot(x$weight ~ x$Time,
xlab="Time (day)",
ylab="Weight (gm)")
legend('topleft', #在左上角做圖例
paste("Diet", x$Diet[1], sep="="),#標註diet的level
bty='n')}) #圖例去掉邊框
## $`1`
## $`1`$rect
## $`1`$rect$w
## [1] 5.821281
##
## $`1`$rect$h
## [1] 52.72942
##
## $`1`$rect$left
## [1] -0.84
##
## $`1`$rect$top
## [1] 315.8
##
##
## $`1`$text
## $`1`$text$x
## [1] 1.151298
##
## $`1`$text$y
## [1] 289.4353
##
##
##
## $`2`
## $`2`$rect
## $`2`$rect$w
## [1] 5.821281
##
## $`2`$rect$h
## [1] 57.02589
##
## $`2`$rect$left
## [1] -0.84
##
## $`2`$rect$top
## [1] 342.68
##
##
## $`2`$text
## $`2`$text$x
## [1] 1.151298
##
## $`2`$text$y
## [1] 314.1671
##
##
##
## $`3`
## $`3`$rect
## $`3`$rect$w
## [1] 5.821281
##
## $`3`$rect$h
## [1] 65.22824
##
## $`3`$rect$left
## [1] -0.84
##
## $`3`$rect$top
## [1] 386.36
##
##
## $`3`$text
## $`3`$text$x
## [1] 1.151298
##
## $`3`$text$y
## [1] 353.7459
##
##
##
## $`4`
## $`4`$rect
## $`4`$rect$w
## [1] 5.821281
##
## $`4`$rect$h
## [1] 55.26824
##
## $`4`$rect$left
## [1] -0.84
##
## $`4`$rect$top
## [1] 333.32
##
##
## $`4`$text
## $`4`$text$x
## [1] 1.151298
##
## $`4`$text$y
## [1] 305.6859
# input data
<-MASS::nlschools
dta#?nlschools
head(dta)
## lang IQ class GS SES COMB
## 1 46 15.0 180 29 23 0
## 2 45 14.5 180 29 10 0
## 3 33 9.5 180 29 15 0
## 4 46 11.0 180 29 23 0
## 5 20 8.0 180 29 10 0
## 6 30 9.5 180 29 10 0
str(dta)
## 'data.frame': 2287 obs. of 6 variables:
## $ lang : int 46 45 33 46 20 30 30 57 36 36 ...
## $ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ GS : int 29 29 29 29 29 29 29 29 29 29 ...
## $ SES : int 23 10 15 23 10 10 23 10 13 15 ...
## $ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
# rename
::p_load(dplyr, magrittr)
pacman%<>%
dta ::rename(classID=class, ClassSize=GS) dplyr
# split by classID
<- split(dta, dta$classID)
dta_class
# find the classes with over 30 pupils
<- lapply(dta_class, function(x) nrow(x[])>30)
pupils30 which(pupils30 == TRUE)
## 5480 15580 15980 16180 18380
## 26 78 80 82 91
#一直以為"要篩選出有30個以上的學生的班級",要使用GS>=30,結果不是,感到相當困惑
#pacman::p_load(magrittr)
#dta %>% dplyr::filter(GS >= '30')
#dtaGS <- dta[dta$GS >= 30, ]
ClassID=5480的學生人數為26 ?!
# set plot
par(mfrow=c(1, 5), mar=c(4, 4, 1, 1))
# histogram
lapply(dta_class[c("15580","15980","16180","18380")],
function(x) {
hist(x$IQ,
xlab = "IQ",
ylab = "Number of student",
main = paste("class", x$class[1], sep=" "))
}|>
)invisible()
Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct a side-by-side box plot of the data across different age groups and comment briefly.
Source: Everitt, B.S., & Hothorn, T. (2009). A Handbook of Statistical Analyses Using R. 2nd ed. p. 15.
#input data
<-HSAUR3::suicides2
dta2str(dta2)
## 'data.frame': 15 obs. of 5 variables:
## $ A25.34: num 22 9 22 29 16 28 48 7 8 26 ...
## $ A35.44: num 27 19 19 40 25 35 65 8 11 29 ...
## $ A45.54: num 31 10 21 52 36 41 84 11 18 36 ...
## $ A55.64: num 34 14 31 53 47 49 81 18 20 32 ...
## $ A65.74: num 24 27 49 69 56 52 107 27 28 28 ...
head(dta2)
## A25.34 A35.44 A45.54 A55.64 A65.74
## Canada 22 27 31 34 24
## Israel 9 19 10 14 27
## Japan 22 19 21 31 49
## Austria 29 40 52 53 69
## France 16 25 36 47 56
## Germany 28 35 41 49 52
#side-by-side box plot作法參考上課講義p.17
#boxplot(x),x=數值向量或者列表
#rename
names(dta2)<- c("25-34", "35-44", "45-54", "55-64","65-74")
boxplot(dta2)
<-as.data.frame(dta2)
data.frame_dta2str(data.frame_dta2) #結果和dta2一樣
## 'data.frame': 15 obs. of 5 variables:
## $ 25-34: num 22 9 22 29 16 28 48 7 8 26 ...
## $ 35-44: num 27 19 19 40 25 35 65 8 11 29 ...
## $ 45-54: num 31 10 21 52 36 41 84 11 18 36 ...
## $ 55-64: num 34 14 31 53 47 49 81 18 20 32 ...
## $ 65-74: num 24 27 49 69 56 52 107 27 28 28 ...
head(data.frame_dta2)
## 25-34 35-44 45-54 55-64 65-74
## Canada 22 27 31 34 24
## Israel 9 19 10 14 27
## Japan 22 19 21 31 49
## Austria 29 40 52 53 69
## France 16 25 36 47 56
## Germany 28 35 41 49 52
#rename
#使用 names() 函數將dataframe的「變數名稱」以向量的型態輸出
names(data.frame_dta2)<- c("Age25-34", "Age35-44", "Age45-54", "Age55-64","Age65-74")
##Side-By-Side vertical Boxplots
boxplot(data.frame_dta2)
#試試as.data.frame.table
<-as.data.frame.table(dta2)
data.frame.table_dta2str(data.frame.table_dta2)
## 'data.frame': 75 obs. of 7 variables:
## $ Var1 : Factor w/ 15 levels "Canada","Israel",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Var2 : Factor w/ 5 levels "25-34","35-44",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Freq.25.34: num 22 9 22 29 16 28 48 7 8 26 ...
## $ Freq.35.44: num 27 19 19 40 25 35 65 8 11 29 ...
## $ Freq.45.54: num 31 10 21 52 36 41 84 11 18 36 ...
## $ Freq.55.64: num 34 14 31 53 47 49 81 18 20 32 ...
## $ Freq.65.74: num 24 27 49 69 56 52 107 27 28 28 ...
head(data.frame.table_dta2)
## Var1 Var2 Freq.25.34 Freq.35.44 Freq.45.54 Freq.55.64 Freq.65.74
## 1 Canada 25-34 22 27 31 34 24
## 2 Israel 25-34 9 19 10 14 27
## 3 Japan 25-34 22 19 21 31 49
## 4 Austria 25-34 29 40 52 53 69
## 5 France 25-34 16 25 36 47 56
## 6 Germany 25-34 28 35 41 49 52
#rename
names(data.frame.table_dta2)<- c("Country","Age_group","Age25-34", "Age35-44", "Age45-54", "Age55-64","Age65-74")
#Side-By-Side Horizontal Boxplots
boxplot(data.frame.table_dta2[,3:7],
horizontal=T, #T=水平放置box-plot
varwidth=T, #T=box的寬度與樣本量的平方根成比例
col = colors(), #也可以指定喜歡的顏色col="Khaki"
cex.axis=.8, #放大座標標記
pch = 4, #離群點的type
lwd = 1.2, #線的寬度
main="Distribution of Male Suicides",
xlab="Deaths per 100,000 from male suicides",
ylab="Age groups")
abline(v=seq(10, 100, 10),lty=3, #畫垂直輔助線
col='gray')
#顏色更換參考:https://htmlcolorcodes.com/colors/
結論:每10萬名男性平均自殺人數有隨著年齡增加的趨勢。
#講義p.10是用plot()來畫boxplot
#好奇轉成data.frame.table是否也能有相同效果,結果不行....
plot(data.frame.table_dta2[,3:7])
Use the dataset to replicate the plot below: Source: Wainer, H. (2001). Uneducated Guesses. Using Evidence to Uncover Misguided Education Policies.
# input data
<- read.table("C:/Users/Ching-Fang Wu/Documents/dataM/sat_gpa.txt", h=T)
dta3
head(dta3)
## College SAT_No GPA_No SAT_Yes GPA_Yes
## 1 Barnard 1210 3.08 1317 3.30
## 2 Northwestern 1243 3.10 1333 3.24
## 3 Bowdoin 1200 2.85 1312 3.12
## 4 Colby 1220 2.90 1280 3.04
## 5 Carnegie Mellon 1237 2.70 1308 2.94
## 6 Georgia Tech 1233 2.62 1287 2.80
str(dta3)
## 'data.frame': 6 obs. of 5 variables:
## $ College: chr "Barnard" "Northwestern" "Bowdoin" "Colby" ...
## $ SAT_No : int 1210 1243 1200 1220 1237 1233
## $ GPA_No : num 3.08 3.1 2.85 2.9 2.7 2.62
## $ SAT_Yes: int 1317 1333 1312 1280 1308 1287
## $ GPA_Yes: num 3.3 3.24 3.12 3.04 2.94 2.8
# Segments作法參考講義p.36-37
#第一步畫起始點
with(dta3, plot(SAT_No, GPA_No))
#第二步畫終點
with(dta3, plot(SAT_Yes, GPA_Yes))
#第三步將兩點連線
with(dta3, segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes))
接下來進行優化,並調整X軸Y軸尺度。
#第一步畫起始點(實心點)
with(dta3, plot(SAT_No, GPA_No,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=19, #16、19實心點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#第一步畫起始點
with(dta3, plot(SAT_No, GPA_No,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=19, #16、19實心點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#第二步畫終點(空心點)
with(dta3, plot(SAT_Yes, GPA_Yes,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=1))
with(dta3, plot(SAT_No, GPA_No,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=19, #16、19實心點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
with(dta3, plot(SAT_Yes, GPA_Yes,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=1, #空心點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
不懂為何點圖沒有疊加上去,而是變成兩張圖?
#第一步先畫框
with(dta3, plot(SAT_No, GPA_No,
type="n", #p:畫點,l:畫線,n:不畫任何點與線
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#第一步先畫框
with(dta3, plot(SAT_No, GPA_No,
type="n", #p:畫點,l:畫線,n:不畫任何點與線
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#第二步畫實心點和空心點
with(dta3, plot(SAT_No, GPA_No,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=19))
#畫空心點
with(dta3, plot(SAT_Yes, GPA_Yes,
type="p", #p:畫點,l:畫線,n:不畫任何點與線
pch=1)) #空心點
我不懂…為何還是無法在同一張圖上!?
以下循著昶濬及婉禎的作法練習操作一次。
par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))
with(dta3, plot(SAT_No, GPA_No,
type="n", #不要呈現點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#先畫線
#先畫"Bowdoin",不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2, col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1, col="black"))
par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))
with(dta3, plot(SAT_No, GPA_No,
type="n", #不要呈現點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#先畫線
#先畫"Bowdoin",不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2, col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1, col="black"))
#再畫點
with(dta3, points(SAT_No, GPA_No, pch=21, bg="black", cex=1.5))
with(dta3, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=1.5))
par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))
with(dta3, plot(SAT_No, GPA_No,
type="n", #不要呈現點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#先畫線
#先畫"Bowdoin",不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2, col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1, col="black"))
#再畫點
with(dta3, points(SAT_No, GPA_No, pch=21, bg="black", cex=1.5))
with(dta3, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=1.5))
# Add text at coordinates
## dta3[-3,]不要畫"Bowdoin", cex為text size
with(dta3[-3,], text(SAT_Yes+44, GPA_Yes, labels=College, cex=.8))
# 透過font讓Bowdoin變粗(1=plain, 2=bold, 3=italic, 4=bold-italic)
with(dta3, text(1312+44, 3.12, labels="Bowdoin", font = 2, cex=.8))
par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))
with(dta3, plot(SAT_No, GPA_No,
type="n", #不要呈現點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#先畫線
#先畫"Bowdoin",不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2, col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1, col="black"))
#再畫點
with(dta3, points(SAT_No, GPA_No, pch=21, bg="black", cex=1.5))
with(dta3, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=1.5))
# Add text at coordinates
## dta3[-3,]不要畫"Bowdoin", cex為text size
with(dta3[-3,], text(SAT_Yes+44, GPA_Yes, labels=College, cex=.8))
# 透過font讓Bowdoin變粗(1=plain, 2=bold, 3=italic, 4=bold-italic)
with(dta3, text(1312+44, 3.12, labels="Bowdoin", font = 2, cex=.8))
#legend
legend("topleft", c("Submitted SAT Scores", "Did NOT Submit SAT Scores"),
pch=c(21,19),
bg=c("white","black"),
pt.cex=1, bty="n", cex = .8)
par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))
with(dta3, plot(SAT_No, GPA_No,
type="n", #不要呈現點
xlim=c(1150, 1400),
ylim=c(2.6,3.4),
xlab="SAT (V+M)",
ylab="First Year GPA"),
cex.axis=1, cex.lab=1)
#先畫線
#先畫"Bowdoin",不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2, col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1, col="black"))
#再畫點
with(dta3, points(SAT_No, GPA_No, pch=21, bg="black", cex=1.5))
with(dta3, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=1.5))
# Add text at coordinates
## dta3[-3,]不要畫"Bowdoin", cex為text size
with(dta3[-3,], text(SAT_Yes+44, GPA_Yes, labels=College, cex=.8))
# 透過font讓Bowdoin變粗(1=plain, 2=bold, 3=italic, 4=bold-italic)
with(dta3, text(1312+44, 3.12, labels="Bowdoin", font = 2, cex=.8))
#legend
legend("topleft", c("Submitted SAT Scores", "Did NOT Submit SAT Scores"),
pch=c(21,19),
bg=c("white","black"),
pt.cex=1, bty="n", cex = .8)
# axis
axis(1, seq(1150,1400, length=16), labels=F)
axis(2, seq(2.6,3.4, length=21), labels=F)
# annotation
mtext("Figure 1.4 The mean SAT coupled with the mean first-year GPA for the class of \n 1999 at six schools shown for those who submitted SAT scores for admis- \n sion and those who did not", side=1, cex=.8, adj = 0, line=2, outer = T)