Exercise 0413

Exercise1

This R script illustrates how to split the plot region to include histograms on the margins of a scatter diagram using the Galton{HistData} data set. Compile it as a html document with comments on each code chunk.

The script will draw 3 plots and arrange the plots as indicated in the following picture.

dta <- HistData::Galton

zones <- matrix(c(2, 0, 1, 3), ncol=2, byrow=TRUE)

layout(zones, widths=c(4/5, 1/5), heights = c(1/5, 4/5))
# set a layout matrix with 2 rows and 2 columns which will be later put in three plots according to the order of (2,0,1,3)


# prepare data for the histogram but don't draw for now.
xh <- with(dta, hist(parent, plot=FALSE))

yh <- with(dta, hist(child, plot=FALSE))

ub <- max(c(xh$counts, yh$counts))


par(mar=c(3, 3, 1, 1))

with(dta, sunflowerplot(parent, child))
# plot 1
# draw a sunflower scatter plot with parent and child height
# set the margins 
par(mar=c(0, 3, 1, 1))

barplot(xh$counts, axes=FALSE, ylim=c(0, ub), space=0)
# plot2
# draw a barplot of the histogram of "parent height" on top of plot1
# set the margins
# don't leave space between each bars
# don't show the axes
# the range of y value is the max of "ub"


par(mar=c(3, 0, 1, 1))
barplot(yh$counts, axes=FALSE, xlim=c(0, ub), space=0, horiz=TRUE)
# plot3
# draw a barplot of the histogram of "child heights" on the right of the scatter plot
# set the margins
# make it horizental


# add text on the specified position
# side=1 means on the bottom of the plot
# "at=" means give location of each string 
par(oma=c(3, 3, 0, 0))
mtext("Average height of parents (in inch)", side=1, line=2, outer=TRUE, adj=0, at=.4 * (mean(dta$parent)
                                                                    - min(dta$parent))/(diff(range(dta$parent))))
mtext("Height of child (in inch)", side=2, line=2, 
      outer=TRUE, adj=0,
      at=.4 * (mean(dta$child) - min(dta$child))/(diff(range(dta$child))))

Exercise2

Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct side-by-side box plots for the data from different age groups and comment briefly.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data(suicides2, package="HSAUR3")
knitr::kable(head(suicides2))

	A25.34	A35.44	A45.54	A55.64	A65.74
Canada	22	27	31	34	24
Israel	9	19	10	14	27
Japan	22	19	21	31	49
Austria	29	40	52	53	69
France	16	25	36	47	56
Germany	28	35	41	49	52

dta <- as_tibble(HSAUR3::suicides2)

boxplot(dta, horizontal=F, names =c("25-34","35-44","45-54","55-64","65-74"),col="bisque", varwidth=T, xlab="Age",ylab="Male Suicides")

Exercise3

#
dta <- ChickWeight

dta_diet <- split(dta, dta$Diet)

par(mfrow=c(2, 2), mar=c(4, 4, 0, 0))

lapply(dta_diet, function(x) {
                  plot(x$weight ~ x$Time, 
                       xlab="Time (day)", 
                       ylab="Weight (gm)")
                  legend('topleft', 
                         paste("Diet", x$Diet[1], sep="="), 
                         bty='n')}
)

## $`1`
## $`1`$rect
## $`1`$rect$w
## [1] 4.948786
## 
## $`1`$rect$h
## [1] 52.72942
## 
## $`1`$rect$left
## [1] -0.84
## 
## $`1`$rect$top
## [1] 315.8
## 
## 
## $`1`$text
## $`1`$text$x
## [1] 0.8528419
## 
## $`1`$text$y
## [1] 289.4353
## 
## 
## 
## $`2`
## $`2`$rect
## $`2`$rect$w
## [1] 4.948786
## 
## $`2`$rect$h
## [1] 57.02589
## 
## $`2`$rect$left
## [1] -0.84
## 
## $`2`$rect$top
## [1] 342.68
## 
## 
## $`2`$text
## $`2`$text$x
## [1] 0.8528419
## 
## $`2`$text$y
## [1] 314.1671
## 
## 
## 
## $`3`
## $`3`$rect
## $`3`$rect$w
## [1] 4.948786
## 
## $`3`$rect$h
## [1] 65.22824
## 
## $`3`$rect$left
## [1] -0.84
## 
## $`3`$rect$top
## [1] 386.36
## 
## 
## $`3`$text
## $`3`$text$x
## [1] 0.8528419
## 
## $`3`$text$y
## [1] 353.7459
## 
## 
## 
## $`4`
## $`4`$rect
## $`4`$rect$w
## [1] 4.948786
## 
## $`4`$rect$h
## [1] 55.26824
## 
## $`4`$rect$left
## [1] -0.84
## 
## $`4`$rect$top
## [1] 333.32
## 
## 
## $`4`$text
## $`4`$text$x
## [1] 0.8528419
## 
## $`4`$text$y
## [1] 305.6859

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

library(dplyr)
dta <- nlschools
str(dta)

## 'data.frame':    2287 obs. of  6 variables:
##  $ lang : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ IQ   : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
##  $ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ GS   : int  29 29 29 29 29 29 29 29 29 29 ...
##  $ SES  : int  23 10 15 23 10 10 23 10 13 15 ...
##  $ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

head(dta)

dta1 <-dta %>% dplyr::filter(GS > 30) 
dta1