Exercise 1

Use trellis graphics to explore various ways to display the sample data from the National Longitudinal Survey of Youth.

dta1 <- read.csv("nlsy86long.csv", header =T, sep=",")
head(dta1)
##     id    sex     race time grade year month      math      read
## 1 2390 Female Majority    1     0    6    67 14.285714 19.047619
## 2 2560 Female Majority    1     0    6    66 20.238095 21.428571
## 3 3740 Female Majority    1     0    6    67 17.857143 21.428571
## 4 4020   Male Majority    1     0    5    60  7.142857  7.142857
## 5 6350   Male Majority    1     1    7    78 29.761905 30.952381
## 6 7030   Male Majority    1     0    5    62 14.285714 17.857143
str(dta1)
## 'data.frame':    664 obs. of  9 variables:
##  $ id   : int  2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
##  $ sex  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 2 1 2 ...
##  $ race : Factor w/ 2 levels "Majority","Minority": 1 1 1 1 1 1 1 1 1 1 ...
##  $ time : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ grade: int  0 0 0 0 1 0 0 0 0 0 ...
##  $ year : int  6 6 6 5 7 5 6 7 6 6 ...
##  $ month: int  67 66 67 60 78 62 66 79 76 67 ...
##  $ math : num  14.29 20.24 17.86 7.14 29.76 ...
##  $ read : num  19.05 21.43 21.43 7.14 30.95 ...
dta1$grade <- as.factor(dta1$grade)

Stripplot

library(lattice)
stripplot(math ~ grade| sex, 
          group=race,
          data=dta1, 
          alpha=.8,
          type=c('g','p'),
          jitter.data=TRUE, 
          xlab="Race",
          ylab="Math scores",
          auto.key=list(space="top", 
                        columns=2))

Dotplot

dotplot(read ~ grade | sex, 
       data=dta1, 
       pch=1, 
       cex=.5, 
       alpha=.5,
       xlab="Grade", 
       ylab='Reading score', 
       par.settings=standard.theme(color=FALSE))

Box-and-whisker plot

bwplot(math ~ grade | sex : race,
       data=dta1,
       xlab="Grade",
       ylab = "Math score",
       par.settings=standard.theme(color=FALSE))

xyplot

xyplot(read ~ year | race, 
       groups=sex, 
       data=dta1,  
       xlab="Age", 
       ylab="Reading score",
       type=c('p', 'g', 'r'),
       between=list(x=0.5),
       auto.key = list(points=TRUE, lines=TRUE, column=2),
       par.settings=simpleTheme(col=c("black", 
                                      "gray"),
                                pch=c(1, 20),
                                col.line=c("black", 
                                           "gray")))

xyplot(read + math ~ year | race, 
       groups=sex, 
       data=dta1,  
       xlab="Age", 
       ylab="Score",
       type=c('p', 'g', 'r'),
       between=list(x=0.5),
       auto.key = list(points=TRUE, lines=TRUE, column=2),
       par.settings=simpleTheme(col=c("black", 
                                      "gray"),
                                pch=c(1, 20),
                                col.line=c("black", 
                                           "gray")))

Histogram

histogram(~ read  | sex, 
          data=dta1, 
          type='density', 
          layout=c(1, 2),
          between=list(y=0.5),
          panel=function(x,...) {
            panel.histogram(x,...)
            panel.mathdensity(dmath=dnorm, 
                              lwd=1.2, 
                              args=list(mean=mean(x, na.rm=T),
                                        sd=sd(x, na.rm=T)), ...)
          },
          par.settings=standard.theme(color=FALSE))

Density plot

densityplot(~ math, 
            groups=sex, 
            data=dta1,
            auto.key=TRUE,
            par.settings=standard.theme(color=FALSE))

Q-Q plot

qqmath(~math | sex,
       data= dta1,
       aspect="xy",
       type=c('p','g'),
       prepanel=prepanel.qqmathline,
       panel=function(x, ...) {
          panel.qqmathline(x, ...)
          panel.qqmath(x, ...)
       },
       par.settings=standard.theme(color=FALSE))

Scatter PlOt Matrix

splom(~ dta1[,c("read", "math", "year")] | sex, 
      data=dta1,
      pch='.', 
      axis.text.cex=0.3,
      par.settings=standard.theme(color=FALSE))

Exercise 2

Eight different physical measurements of 30 French girls were recorded from 4 to 15 years old. Explore various ways to display the data using trellis graphics.

dta2 <- read.table("girlsGrowth.txt", header = T)
head(dta2)
##     Wt   Ht  Hb  Hc  Cc Arm Calf Pelvis age id
## 1 1456 1025 602 486 520 157  205    170   4 S1
## 2 1426  998 572 501 520 150  215    169   4 S2
## 3 1335  961 560 494 495 145  214    158   4 S3
## 4 1607 1006 595 497 560 178  218    172   4 S4
## 5 1684 1012 584 490 553 165  220    158   4 S5
## 6 1374 1012 580 492 525 158  202    167   4 S6
str(dta2)
## 'data.frame':    360 obs. of  10 variables:
##  $ Wt    : int  1456 1426 1335 1607 1684 1374 1570 1450 1214 1456 ...
##  $ Ht    : int  1025 998 961 1006 1012 1012 1040 990 968 983 ...
##  $ Hb    : int  602 572 560 595 584 580 586 561 571 563 ...
##  $ Hc    : int  486 501 494 497 490 492 511 488 481 485 ...
##  $ Cc    : int  520 520 495 560 553 525 540 520 476 532 ...
##  $ Arm   : int  157 150 145 178 165 158 153 159 145 158 ...
##  $ Calf  : int  205 215 214 218 220 202 220 210 198 219 ...
##  $ Pelvis: int  170 169 158 172 158 167 180 158 150 154 ...
##  $ age   : int  4 4 4 4 4 4 4 4 4 4 ...
##  $ id    : Factor w/ 30 levels "S1","S10","S11",..: 1 12 23 25 26 27 28 29 30 2 ...
dta2$age <- as.factor(dta2$age)
stripplot(Arm~ Ht,
          groups= age,
          data = dta2,
          alpha=.5,
          type=c("g", "p"),
          jitter.data= TRUE,
          xlab="Height(cm)",
          auto.key=list(space="top", column = 3),
          par.settings= standard.theme(color = FALSE))

xyplot(Arm ~ Ht,
       groups = age,
       data=dta2, 
       type="smooth",
       panel=function(x, y, ...) {
        panel.xyplot(x, y, ...)
        panel.grid(h=-1, 
                   v=-1, 
                   col="gray80", 
                   lty=3, ...)
        panel.average(x, y, fun=mean, 
                      horizontal=FALSE, 
                      col='gray', ...)},
       auto.key = list(lines = TRUE, column = 4)
       )

Exercise 3

Your manager gave you a sales data on sevral products in a SAS format. Your task is to summarize and report the data in tables and graphs using the R lattice package.

#use {haven} by Hadley Wickham
library(haven)
dta3 <- read_sas("sales.sas7bdat")
head(dta3)
## # A tibble: 6 x 14
##   product category customer  year month quarter market sales expense region
##   <chr>   <chr>    <chr>    <dbl> <dbl>   <dbl>  <dbl> <dbl>   <dbl>  <dbl>
## 1 Shoes   Shoes    Acme      2001     1       1      1   300     240      1
## 2 Boots   Shoes    Acme      2001     1       1      1  2200    1540      1
## 3 Slippe… Slippers Acme      2001     1       1      1   900     540      1
## 4 Shoes   Shoes    Acme      2001     2       1      1   100      80      1
## 5 Boots   Shoes    Acme      2001     2       1      1  1400     980      1
## 6 Slippe… Slippers Acme      2001     2       1      1     0       0      1
## # … with 4 more variables: district <dbl>, return <dbl>, constantv <dbl>,
## #   quantity <dbl>
str(dta3)
## Classes 'tbl_df', 'tbl' and 'data.frame':    72 obs. of  14 variables:
##  $ product  : chr  "Shoes" "Boots" "Slippers" "Shoes" ...
##   ..- attr(*, "label")= chr "Product"
##  $ category : chr  "Shoes" "Shoes" "Slippers" "Shoes" ...
##   ..- attr(*, "label")= chr "Product Category"
##  $ customer : chr  "Acme" "Acme" "Acme" "Acme" ...
##   ..- attr(*, "label")= chr "Customer"
##  $ year     : num  2001 2001 2001 2001 2001 ...
##   ..- attr(*, "label")= chr "Year"
##  $ month    : num  1 1 1 2 2 2 3 3 3 4 ...
##   ..- attr(*, "label")= chr "Month"
##  $ quarter  : num  1 1 1 1 1 1 1 1 1 2 ...
##   ..- attr(*, "label")= chr "Quarter"
##  $ market   : num  1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Market"
##  $ sales    : num  300 2200 900 100 1400 0 600 0 1400 2600 ...
##   ..- attr(*, "label")= chr "Sales"
##  $ expense  : num  240 1540 540 80 980 0 480 0 840 2080 ...
##   ..- attr(*, "label")= chr "Expense"
##  $ region   : num  1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Region"
##  $ district : num  1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "District"
##  $ return   : num  0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "label")= chr "Returns"
##  $ constantv: num  1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Constant Value"
##  $ quantity : num  30 275 180 10 175 0 60 0 280 260 ...
##   ..- attr(*, "label")= chr "Product Quantity"
##  - attr(*, "label")= chr "SALES"
## Recode the `region` variable (1 to 4) by "Nothern", "Southern", "Eastern" and "Western"
dta3$region <- as.factor(dta3$region)
levels(dta3$region)[1:4] <- c("Nothern", "Southern", "Eastern", "Western")


## Recode  `district` variable (1 - 5) by "North East", "South East", "South West", "North West", "Central West"
dta3$district <- as.factor(dta3$district)
levels(dta3$district)[1:5] <- c("North East", "South East", "South West", "North West", "Central West")

## Recode `quarter` variable (1-4) by "1st", "2nd", "3rd", "4th"
dta3$quarter <- as.factor(dta3$quarter)
levels(dta3$quarter)[1:4] <- c("1st", "2nd", "3rd", "4th")

## Recode `month` variable (1-12) by "Jan", "Feb", etc. Set negative sales values to zero.
dta3$month <- as.factor(dta3$month)
levels(dta3$month)[1:12] <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")

#set negative sales value to zero
library(lattice)
dta3$year <- as.factor(dta3$year)
stripplot(sales ~ month, groups= year,
          data=dta3, 
       layout=c(1,2),
       xlab="Month", type=c('p', 'g',"r"),
       auto.key=list(space="top", lines= T, column = 2))

2001年及2002年,銷售額都隨著月份增加

histogram(~ sales  | region, 
          data=dta3, 
          type='density', 
          
          between=list(y=0.5),
          panel=function(x,...) {
            panel.histogram(x,...)
            panel.mathdensity(dmath=dnorm, 
                              lwd=1.2, 
                              args=list(mean=mean(x, na.rm=T),
                                        sd=sd(x, na.rm=T)), ...)
          },
          par.settings=standard.theme(color=FALSE))

Eastern區的銷售額都偏高單價