Use trellis graphics to explore various ways to display the sample data from the National Longitudinal Survey of Youth.
dta1 <- read.csv("nlsy86long.csv", header =T, sep=",")
head(dta1)
## id sex race time grade year month math read
## 1 2390 Female Majority 1 0 6 67 14.285714 19.047619
## 2 2560 Female Majority 1 0 6 66 20.238095 21.428571
## 3 3740 Female Majority 1 0 6 67 17.857143 21.428571
## 4 4020 Male Majority 1 0 5 60 7.142857 7.142857
## 5 6350 Male Majority 1 1 7 78 29.761905 30.952381
## 6 7030 Male Majority 1 0 5 62 14.285714 17.857143
str(dta1)
## 'data.frame': 664 obs. of 9 variables:
## $ id : int 2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 2 2 2 1 2 ...
## $ race : Factor w/ 2 levels "Majority","Minority": 1 1 1 1 1 1 1 1 1 1 ...
## $ time : int 1 1 1 1 1 1 1 1 1 1 ...
## $ grade: int 0 0 0 0 1 0 0 0 0 0 ...
## $ year : int 6 6 6 5 7 5 6 7 6 6 ...
## $ month: int 67 66 67 60 78 62 66 79 76 67 ...
## $ math : num 14.29 20.24 17.86 7.14 29.76 ...
## $ read : num 19.05 21.43 21.43 7.14 30.95 ...
dta1$grade <- as.factor(dta1$grade)
library(lattice)
stripplot(math ~ grade| sex,
group=race,
data=dta1,
alpha=.8,
type=c('g','p'),
jitter.data=TRUE,
xlab="Race",
ylab="Math scores",
auto.key=list(space="top",
columns=2))
dotplot(read ~ grade | sex,
data=dta1,
pch=1,
cex=.5,
alpha=.5,
xlab="Grade",
ylab='Reading score',
par.settings=standard.theme(color=FALSE))
bwplot(math ~ grade | sex : race,
data=dta1,
xlab="Grade",
ylab = "Math score",
par.settings=standard.theme(color=FALSE))
xyplot(read ~ year | race,
groups=sex,
data=dta1,
xlab="Age",
ylab="Reading score",
type=c('p', 'g', 'r'),
between=list(x=0.5),
auto.key = list(points=TRUE, lines=TRUE, column=2),
par.settings=simpleTheme(col=c("black",
"gray"),
pch=c(1, 20),
col.line=c("black",
"gray")))
xyplot(read + math ~ year | race,
groups=sex,
data=dta1,
xlab="Age",
ylab="Score",
type=c('p', 'g', 'r'),
between=list(x=0.5),
auto.key = list(points=TRUE, lines=TRUE, column=2),
par.settings=simpleTheme(col=c("black",
"gray"),
pch=c(1, 20),
col.line=c("black",
"gray")))
histogram(~ read | sex,
data=dta1,
type='density',
layout=c(1, 2),
between=list(y=0.5),
panel=function(x,...) {
panel.histogram(x,...)
panel.mathdensity(dmath=dnorm,
lwd=1.2,
args=list(mean=mean(x, na.rm=T),
sd=sd(x, na.rm=T)), ...)
},
par.settings=standard.theme(color=FALSE))
densityplot(~ math,
groups=sex,
data=dta1,
auto.key=TRUE,
par.settings=standard.theme(color=FALSE))
qqmath(~math | sex,
data= dta1,
aspect="xy",
type=c('p','g'),
prepanel=prepanel.qqmathline,
panel=function(x, ...) {
panel.qqmathline(x, ...)
panel.qqmath(x, ...)
},
par.settings=standard.theme(color=FALSE))
splom(~ dta1[,c("read", "math", "year")] | sex,
data=dta1,
pch='.',
axis.text.cex=0.3,
par.settings=standard.theme(color=FALSE))
Eight different physical measurements of 30 French girls were recorded from 4 to 15 years old. Explore various ways to display the data using trellis graphics.
dta2 <- read.table("girlsGrowth.txt", header = T)
head(dta2)
## Wt Ht Hb Hc Cc Arm Calf Pelvis age id
## 1 1456 1025 602 486 520 157 205 170 4 S1
## 2 1426 998 572 501 520 150 215 169 4 S2
## 3 1335 961 560 494 495 145 214 158 4 S3
## 4 1607 1006 595 497 560 178 218 172 4 S4
## 5 1684 1012 584 490 553 165 220 158 4 S5
## 6 1374 1012 580 492 525 158 202 167 4 S6
str(dta2)
## 'data.frame': 360 obs. of 10 variables:
## $ Wt : int 1456 1426 1335 1607 1684 1374 1570 1450 1214 1456 ...
## $ Ht : int 1025 998 961 1006 1012 1012 1040 990 968 983 ...
## $ Hb : int 602 572 560 595 584 580 586 561 571 563 ...
## $ Hc : int 486 501 494 497 490 492 511 488 481 485 ...
## $ Cc : int 520 520 495 560 553 525 540 520 476 532 ...
## $ Arm : int 157 150 145 178 165 158 153 159 145 158 ...
## $ Calf : int 205 215 214 218 220 202 220 210 198 219 ...
## $ Pelvis: int 170 169 158 172 158 167 180 158 150 154 ...
## $ age : int 4 4 4 4 4 4 4 4 4 4 ...
## $ id : Factor w/ 30 levels "S1","S10","S11",..: 1 12 23 25 26 27 28 29 30 2 ...
dta2$age <- as.factor(dta2$age)
stripplot(Arm~ Ht,
groups= age,
data = dta2,
alpha=.5,
type=c("g", "p"),
jitter.data= TRUE,
xlab="Height(cm)",
auto.key=list(space="top", column = 3),
par.settings= standard.theme(color = FALSE))
xyplot(Arm ~ Ht,
groups = age,
data=dta2,
type="smooth",
panel=function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.grid(h=-1,
v=-1,
col="gray80",
lty=3, ...)
panel.average(x, y, fun=mean,
horizontal=FALSE,
col='gray', ...)},
auto.key = list(lines = TRUE, column = 4)
)
Your manager gave you a sales data on sevral products in a SAS format. Your task is to summarize and report the data in tables and graphs using the R lattice package.
#use {haven} by Hadley Wickham
library(haven)
dta3 <- read_sas("sales.sas7bdat")
head(dta3)
## # A tibble: 6 x 14
## product category customer year month quarter market sales expense region
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Shoes Shoes Acme 2001 1 1 1 300 240 1
## 2 Boots Shoes Acme 2001 1 1 1 2200 1540 1
## 3 Slippe… Slippers Acme 2001 1 1 1 900 540 1
## 4 Shoes Shoes Acme 2001 2 1 1 100 80 1
## 5 Boots Shoes Acme 2001 2 1 1 1400 980 1
## 6 Slippe… Slippers Acme 2001 2 1 1 0 0 1
## # … with 4 more variables: district <dbl>, return <dbl>, constantv <dbl>,
## # quantity <dbl>
str(dta3)
## Classes 'tbl_df', 'tbl' and 'data.frame': 72 obs. of 14 variables:
## $ product : chr "Shoes" "Boots" "Slippers" "Shoes" ...
## ..- attr(*, "label")= chr "Product"
## $ category : chr "Shoes" "Shoes" "Slippers" "Shoes" ...
## ..- attr(*, "label")= chr "Product Category"
## $ customer : chr "Acme" "Acme" "Acme" "Acme" ...
## ..- attr(*, "label")= chr "Customer"
## $ year : num 2001 2001 2001 2001 2001 ...
## ..- attr(*, "label")= chr "Year"
## $ month : num 1 1 1 2 2 2 3 3 3 4 ...
## ..- attr(*, "label")= chr "Month"
## $ quarter : num 1 1 1 1 1 1 1 1 1 2 ...
## ..- attr(*, "label")= chr "Quarter"
## $ market : num 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Market"
## $ sales : num 300 2200 900 100 1400 0 600 0 1400 2600 ...
## ..- attr(*, "label")= chr "Sales"
## $ expense : num 240 1540 540 80 980 0 480 0 840 2080 ...
## ..- attr(*, "label")= chr "Expense"
## $ region : num 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Region"
## $ district : num 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "District"
## $ return : num 0 0 0 0 0 0 0 0 0 0 ...
## ..- attr(*, "label")= chr "Returns"
## $ constantv: num 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "label")= chr "Constant Value"
## $ quantity : num 30 275 180 10 175 0 60 0 280 260 ...
## ..- attr(*, "label")= chr "Product Quantity"
## - attr(*, "label")= chr "SALES"
## Recode the `region` variable (1 to 4) by "Nothern", "Southern", "Eastern" and "Western"
dta3$region <- as.factor(dta3$region)
levels(dta3$region)[1:4] <- c("Nothern", "Southern", "Eastern", "Western")
## Recode `district` variable (1 - 5) by "North East", "South East", "South West", "North West", "Central West"
dta3$district <- as.factor(dta3$district)
levels(dta3$district)[1:5] <- c("North East", "South East", "South West", "North West", "Central West")
## Recode `quarter` variable (1-4) by "1st", "2nd", "3rd", "4th"
dta3$quarter <- as.factor(dta3$quarter)
levels(dta3$quarter)[1:4] <- c("1st", "2nd", "3rd", "4th")
## Recode `month` variable (1-12) by "Jan", "Feb", etc. Set negative sales values to zero.
dta3$month <- as.factor(dta3$month)
levels(dta3$month)[1:12] <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
#set negative sales value to zero
library(lattice)
dta3$year <- as.factor(dta3$year)
stripplot(sales ~ month, groups= year,
data=dta3,
layout=c(1,2),
xlab="Month", type=c('p', 'g',"r"),
auto.key=list(space="top", lines= T, column = 2))
2001年及2002年,銷售額都隨著月份增加
histogram(~ sales | region,
data=dta3,
type='density',
between=list(y=0.5),
panel=function(x,...) {
panel.histogram(x,...)
panel.mathdensity(dmath=dnorm,
lwd=1.2,
args=list(mean=mean(x, na.rm=T),
sd=sd(x, na.rm=T)), ...)
},
par.settings=standard.theme(color=FALSE))
Eastern區的銷售額都偏高單價