inclass1

The R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example. Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.

ChickWeight{datasets} example

# input data
dta <- ChickWeight
# split data by different Diet group
dta_diet <- split(dta, dta$Diet)
# 設定圖output的呈現樣式
## mfrow=c(row,column)設定呈現之行列數
## mar=c(bottom,left,top,right)設定圖距離邊緣的位置
## x$Diet[1]只呈現第一個，若沒寫[1]會出現一排Diet=1.....
par(mfrow=c(2,2), mar=c(2,2,2,2))
lapply(dta_diet, function(x){
  plot(x$weight ~ x$Time,
       xlab="Time (day)",
       ylab="Weight (gm)")
  legend('topleft',
         paste("Diet", x$Diet[1], sep="="),
         bty='n')
})

$`1`
$`1`$rect
$`1`$rect$w
[1] 4.948786

$`1`$rect$h
[1] 115.8029

$`1`$rect$left
[1] -0.84

$`1`$rect$top
[1] 315.8


$`1`$text
$`1`$text$x
[1] 0.8528419

$`1`$text$y
[1] 257.8985



$`2`
$`2`$rect
$`2`$rect$w
[1] 4.948786

$`2`$rect$h
[1] 125.2387

$`2`$rect$left
[1] -0.84

$`2`$rect$top
[1] 342.68


$`2`$text
$`2`$text$x
[1] 0.8528419

$`2`$text$y
[1] 280.0607



$`3`
$`3`$rect
$`3`$rect$w
[1] 4.948786

$`3`$rect$h
[1] 143.2525

$`3`$rect$left
[1] -0.84

$`3`$rect$top
[1] 386.36


$`3`$text
$`3`$text$x
[1] 0.8528419

$`3`$text$y
[1] 314.7338



$`4`
$`4`$rect
$`4`$rect$w
[1] 4.948786

$`4`$rect$h
[1] 121.3786

$`4`$rect$left
[1] -0.84

$`4`$rect$top
[1] 333.32


$`4`$text
$`4`$text$x
[1] 0.8528419

$`4`$text$y
[1] 272.6307

nlschools{MASS} dataset

# input data
dta1 <- MASS::nlschools
# split data by different class group
dta_class <- split(dta1, dta1$class)
# find the class with pupils>30
pupils30<-lapply(dta_class, function(x) nrow(x)>30) 
which(pupils30==TRUE)

 5480 15580 15980 16180 18380 
   26    78    80    82    91

# 設定圖output的呈現樣式
## mfrow=c(row,column)設定呈現之行列數
## mar=c(bottom,left,top,right)設定圖距離邊緣的位置
par(mfrow=c(1,5), mar=c(4,4,1,1))
lapply(dta_class[c("5480","15580","15980","16180","18380")], 
       function(x){
         hist(x$IQ,
              xlab="IQ",
              ylab="Number of student",
         main = paste("class", x$class[1], sep = "="))
})

$`5480`
$breaks
[1] 10 11 12 13 14 15 16 17 18

$counts
[1] 4 9 8 3 2 3 1 1

$density
[1] 0.12903226 0.29032258 0.25806452 0.09677419 0.06451613 0.09677419 0.03225806
[8] 0.03225806

$mids
[1] 10.5 11.5 12.5 13.5 14.5 15.5 16.5 17.5

$xname
[1] "x$IQ"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

$`15580`
$breaks
[1]  8  9 10 11 12 13 14 15

$counts
[1] 4 2 7 7 7 4 2

$density
[1] 0.12121212 0.06060606 0.21212121 0.21212121 0.21212121 0.12121212 0.06060606

$mids
[1]  8.5  9.5 10.5 11.5 12.5 13.5 14.5

$xname
[1] "x$IQ"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

$`15980`
$breaks
 [1]  9 10 11 12 13 14 15 16 17 18

$counts
[1] 3 7 7 7 4 1 0 1 1

$density
[1] 0.09677419 0.22580645 0.22580645 0.22580645 0.12903226 0.03225806 0.00000000
[8] 0.03225806 0.03225806

$mids
[1]  9.5 10.5 11.5 12.5 13.5 14.5 15.5 16.5 17.5

$xname
[1] "x$IQ"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

$`16180`
$breaks
[1]  8 10 12 14 16 18

$counts
[1]  2 15 11  2  1

$density
[1] 0.03225806 0.24193548 0.17741935 0.03225806 0.01612903

$mids
[1]  9 11 13 15 17

$xname
[1] "x$IQ"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

$`18380`
$breaks
[1]  6  8 10 12 14 16

$counts
[1]  1  2 20  7  1

$density
[1] 0.01612903 0.03225806 0.32258065 0.11290323 0.01612903

$mids
[1]  7  9 11 13 15

$xname
[1] "x$IQ"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

inclass2

Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct a side-by-side box plot of the data across different age groups and comment briefly. Source: Everitt, B.S., & Hothorn, T. (2009). A Handbook of Statistical Analyses Using R. 2nd ed. p. 15.

library(tidyverse)
dta2 <-HSAUR3::suicides2 
names(dta2)<- c("25-34", "35-44", "45-54", "55-64","65-74")
dta2 <-rownames_to_column(dta2, var = "country")
str(dta2)

'data.frame':   15 obs. of  6 variables:
 $ country: chr  "Canada" "Israel" "Japan" "Austria" ...
 $ 25-34  : num  22 9 22 29 16 28 48 7 8 26 ...
 $ 35-44  : num  27 19 19 40 25 35 65 8 11 29 ...
 $ 45-54  : num  31 10 21 52 36 41 84 11 18 36 ...
 $ 55-64  : num  34 14 31 53 47 49 81 18 20 32 ...
 $ 65-74  : num  24 27 49 69 56 52 107 27 28 28 ...

pacman::p_load(tidyverse, magrittr)
dta2 %<>% pivot_longer(-country,names_to = "Age", values_to = "Number")

boxplot(Number ~ Age,
        data=dta2,
        frame=F,
        horizontal=T,
        varwidth=T,
        cex.axis=.6,
        xlab="Deaths per 100,000 from male suicides",
        ylab="Age")
abline(v=seq(0,100,20), lty=3, col='gray')
abline(v=mean(dta2$Number), lty=3, col='red')
stripchart(Number ~ Age,
           data=dta2,
           add=T,
           col='darkgray', pch=1,
           cex=.8,
           method='jitter')

年齡組越高自殺率越高

inclass3

Use the dataset to replicate the plot below:

Source: Wainer, H. (2001). Uneducated Guesses. Using Evidence to Uncover Misguided Education Policies.

dta3 <- read.table("sat_gpa.txt", header = T)

par(pty="s",mgp=c(2,1,0))
par(mar=c(3,3,0,0)+0.2)
par(oma=c(3,0,3,0))

# 先布置好畫面
with(dta3, plot(SAT_No, GPA_No, 
                  type="n", #不要呈現點
                  xlim=c(1150, 1400), 
                  ylim=c(2.6,3.4),
                  xlab="SAT (V+M)",
                  ylab="First Year GPA"),
                  cex.axis=1, cex.lab=1)

# 畫線(要先畫不然線會穿過圈圈)
# 要先畫"Bowdoin"，不然"Colby"會被蓋掉
with(dta3[c(3),], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=2,  col="grey"))
with(dta3[-3,], segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, lty=1, lwd=1,  col="black"))


# 畫點
with(dta3, points(SAT_No, GPA_No, pch=21, bg="black", cex=1.5))
with(dta3, points(SAT_Yes, GPA_Yes, pch=21, bg="white", cex=1.5))

# Add text at coordinates
## dta3[-3,]不要畫"Bowdoin", cex為text size
with(dta3[-3,], text(SAT_Yes+44, GPA_Yes, labels=College, cex=.8))
# 透過font讓Bowdoin變粗(1=plain, 2=bold, 3=italic, 4=bold-italic)
with(dta3, text(1312+44, 3.12, labels="Bowdoin", font = 2, cex=.8))

#legend
legend("topleft", c("Submitted SAT Scores", "Did NOT Submit SAT Scores"),
       pch=c(21,19),
       bg=c("white","black"),
       pt.cex=1, bty="n", cex = .8)

# axis
axis(1, seq(1150,1400, length=16), labels=F)
axis(2, seq(2.6,3.4, length=21), labels=F)

# annotation
mtext("Figure 1.4 The mean SAT coupled with the mean first-year GPA for the class of \n 1999 at six schools  shown for those who submitted SAT scores for admis- \n sion and those who did not", side=1, cex=.8, adj = 0, line=2, outer = T)

在margin的設定上有一些障礙

1108inclass

Jenny Hsu

2021/11/16

inclass1

ChickWeight{datasets} example

nlschools{MASS} dataset

inclass2

inclass3