Render the R script for replicating figures in Chapter 4 of Lattice: Multivariate Data Visualization with R (Sarkar, D. 2008) to html document with comments at each code chunk indicated by ‘##’.
##read the data
VADeaths
## Rural Male Rural Female Urban Male Urban Female
## 50-54 11.7 8.7 15.4 8.4
## 55-59 18.1 11.7 24.3 13.6
## 60-64 26.9 20.3 37.0 19.3
## 65-69 41.0 30.9 54.6 35.1
## 70-74 66.0 54.3 71.1 50.0
##identify data type
class(VADeaths)
## [1] "matrix"
##View dotplot content
library(lattice)
methods("dotplot")
## [1] dotplot.array* dotplot.default* dotplot.formula* dotplot.matrix*
## [5] dotplot.numeric* dotplot.table*
## see '?methods' for accessing help and source code
##plot with raw data
dotplot(VADeaths, groups=FALSE)
dotplot(VADeaths, groups=FALSE,
layout=c(1, 4), ##four columns and one row
aspect=0.7, ##graphic width
origin=0,
type=c("p", "h"), ##dot and line
main="Death Rates in Virginia - 1940",
xlab="Rate (per 1000)")
##legend on right side
dotplot(VADeaths, type="o",
auto.key=list(lines=TRUE, space="right"),
main="Death Rates in Virginia - 1940",
xlab="Rate (per 1000)")
##bar chart
barchart(VADeaths, groups=FALSE,
layout=c(1, 4), ##four columns and one row
aspect=0.7, ##graphic width
reference=FALSE,
main="Death Rates in Virginia - 1940",
xlab="Rate (per 100)")
##read the postdoc
data(postdoc, package="latticeExtra")
##calculate the percentage of each group
barchart(prop.table(postdoc, margin=1),
xlab="Proportion",
auto.key=list(adj=1)) ##list group name
##calculate the percentage of each group (dot chart)
dotplot(prop.table(postdoc, margin=1),groups=FALSE,
xlab="Proportion",
par.strip.text=list(abbreviate=TRUE, minlength=10))##abbreviated text
## stack for long dotplot
dotplot(prop.table(postdoc, margin=1), groups=FALSE,
index.cond=function(x, y) median(x),
xlab="Proportion",
layout=c(1, 5),##five columns and one row
aspect=0.6,##graphic width
scales=list(y=list(relation="free", rot=0)),
prepanel=function(x, y) {
list(ylim=levels(reorder(y, x)))
},##row the dot chart
panel=function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##read data Chem97{mlmRev}
data(Chem97, package="mlmRev")
##使用xtabs函數將多個項度製成表格
gcsescore.tab <- xtabs(~ gcsescore + gender, Chem97)
##as.data.frame儲存成矩陣資料格式
gcsescore.df <- as.data.frame(gcsescore.tab)
##as.numeric將gcsescore儲存成數字形式,as.character轉換成文字的形式
gcsescore.df$gcsescore <- as.numeric(as.character(gcsescore.df$gcsescore))
##使用xyplot語法畫lattice散佈圖,指定x 與y 軸的資料,y值=Freq,X值gcsescore,Z值=gender
xyplot(Freq ~ gcsescore | gender,
data = gcsescore.df,
type="h",
layout=c(1, 2),
xlab="Average GCSE Score")
#使用xtabs函數將多個項度製成表格
score.tab <- xtabs(~score + gender, Chem97)
##as.data.frame儲存成矩陣資料格式
score.df <- as.data.frame(score.tab)
##製作barchart()長條圖
barchart(Freq ~ score | gender, score.df, origin=0)
Create a new student-teacher ratio variable from the enrltot and teachers variables in the data set Caschool{Ecdat} to generate the following plot in which reading scores (readscr) for grade span assignment grspan equals “KK-08” in the data set are split into three levels: lower-third, middle-third, and upper-third:
#讀取資料
head(Ecdat::Caschool)
## distcod county district grspan enrltot teachers
## 1 75119 Alameda Sunol Glen Unified KK-08 195 10.90
## 2 61499 Butte Manzanita Elementary KK-08 240 11.15
## 3 61549 Butte Thermalito Union Elementary KK-08 1550 82.90
## 4 61457 Butte Golden Feather Union Elementary KK-08 243 14.00
## 5 61523 Butte Palermo Union Elementary KK-08 1335 71.50
## 6 62042 Fresno Burrel Union Elementary KK-08 137 6.40
## calwpct mealpct computer testscr compstu expnstu str avginc
## 1 0.5102 2.0408 67 690.80 0.3435898 6384.911 17.88991 22.690001
## 2 15.4167 47.9167 101 661.20 0.4208333 5099.381 21.52466 9.824000
## 3 55.0323 76.3226 169 643.60 0.1090323 5501.955 18.69723 8.978000
## 4 36.4754 77.0492 85 647.70 0.3497942 7101.831 17.35714 8.978000
## 5 33.1086 78.4270 171 640.85 0.1280899 5235.988 18.67133 9.080333
## 6 12.3188 86.9565 25 605.55 0.1824818 5580.147 21.40625 10.415000
## elpct readscr mathscr
## 1 0.000000 691.6 690.0
## 2 4.583333 660.5 661.9
## 3 30.000002 636.3 650.9
## 4 0.000000 651.9 643.5
## 5 13.857677 641.8 639.9
## 6 12.408759 605.7 605.4
dta2 <- Ecdat::Caschool
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lattice)
#製作三等份的cutpoint
cutpoint1<- min(dta2$readscr)+((max(dta2$readscr)-min(dta2$readscr))/3)
cutpoint2<- max(dta2$readscr)-((max(dta2$readscr)-min(dta2$readscr))/3)
#產生新的STration 及readscr的分級結果變項
dta2 %>%
mutate(STratio= enrltot / teachers,
rlevels= case_when(readscr < cutpoint1 ~"lower-third",
readscr >=cutpoint1 & readscr < cutpoint2 ~"middle-third",
readscr >= cutpoint2 ~"upper-third")) %>%
group_by(grspan) %>% #分組
xyplot(readscr~STratio | rlevels, type=c("g", "r", "p"), data=.,
xlab = "Student-Teacher Ratio", ylab = "Reading score")
The data set concerns student evaluation of instructor’s beauty and teaching quality for several courses at the University of Texas. The teaching evaluatons were done at the end of the semester, and the beauty judgments were made later, by six students who had not attended the classes and were not aware of the course evaluations.
dta3 <- read.table("beautyCourseEval.txt", header = T)
head(dta3)
## eval beauty sex age minority tenure courseID
## 1 4.3 0.2015666 1 36 1 0 3
## 2 4.5 -0.8260813 0 59 0 1 0
## 3 3.7 -0.6603327 0 51 0 1 4
## 4 4.3 -0.7663125 1 40 0 1 2
## 5 4.4 1.4214450 1 31 0 0 0
## 6 4.2 0.5002196 0 62 0 1 0
dta3$courseID<-as.character(dta3$courseID)
##??
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
m<-lmList(eval~beauty| courseID, data = dta3)
m1<-data.frame(courseID=rownames(coef(m)),coef(m),check.names=FALSE)
m2<-m1[with(m1,order(beauty, decreasing=F)),]
m.order<-list(m2$courseID)
xyplot(eval~beauty|courseID, data=dta3, type=c("p", "g", "r"), layout=c(6,6), index.cond=m.order, xlab="Beauty judgment score", ylab="Average course evaluation score")
A sample of 40 psychology students at a large southwestern university took four subtests (Vocabulary, Similarities, Block Design, and Picture Completion) of the Wechsler (1981) Adult Intelligence Scale-Revised. The researchers also used Magnetic Resonance Imaging (MRI) to determine the brain size of the subjects.
dta4 <- read.table("brainsize.txt", header = T)
head(dta4)
## Sbj Gender FSIQ VIQ PIQ Weight Height MRICount
## 1 1 Female 133 132 124 118 64.5 816932
## 2 2 Male 140 150 124 NA 72.5 1001121
## 3 3 Male 139 123 150 143 73.3 1038437
## 4 4 Male 133 129 128 172 68.8 965353
## 5 5 Female 137 132 134 147 65.0 951545
## 6 6 Female 99 90 110 146 69.0 928799
str(dta4)
## 'data.frame': 40 obs. of 8 variables:
## $ Sbj : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 1 1 1 2 2 ...
## $ FSIQ : int 133 140 139 133 137 99 138 92 89 133 ...
## $ VIQ : int 132 150 123 129 132 90 136 90 93 114 ...
## $ PIQ : int 124 124 150 128 134 110 131 98 84 147 ...
## $ Weight : int 118 NA 143 172 147 146 138 175 134 172 ...
## $ Height : num 64.5 72.5 73.3 68.8 65 69 64.5 66 66.3 68.8 ...
## $ MRICount: int 816932 1001121 1038437 965353 951545 928799 991305 854258 904858 955466 ...
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
dta41<-melt(dta4, id=c("Sbj", "Gender", "Weight", "Height", "MRICount"))
dta41 %>% bwplot(value~Gender | variable,
data=.,
par.settings=standard.theme(color=FALSE),
xlab="Gender", ylab="IQ score")
lapply(dta4[3:5], function(x)
t.test(x ~ dta4$Gender, na.action = na.pass))
## $FSIQ
##
## Welch Two Sample t-test
##
## data: x by dta4$Gender
## t = -0.40267, df = 37.892, p-value = 0.6895
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -18.68639 12.48639
## sample estimates:
## mean in group Female mean in group Male
## 111.9 115.0
##
##
## $VIQ
##
## Welch Two Sample t-test
##
## data: x by dta4$Gender
## t = -0.77262, df = 36.973, p-value = 0.4447
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -21.010922 9.410922
## sample estimates:
## mean in group Female mean in group Male
## 109.45 115.25
##
##
## $PIQ
##
## Welch Two Sample t-test
##
## data: x by dta4$Gender
## t = -0.1598, df = 37.815, p-value = 0.8739
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -15.72079 13.42079
## sample estimates:
## mean in group Female mean in group Male
## 110.45 111.60
-There are NO gender differences in the three IQ scores.
xyplot(Weight ~ Height, groups=Gender,
data=dta4, type=c('p', 'g', 'r'),
auto.key=list(column=2),
xlab="Height", ylab="Weight")
lapply(dta4[6:7], function(x)
summary(lm(x~Gender, data=dta4, na.action=na.omit)))
## $Weight
##
## Call:
## lm(formula = x ~ Gender, data = dta4, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.444 -15.383 3.678 13.306 37.800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.200 4.132 33.203 < 2e-16 ***
## GenderMale 29.244 6.004 4.871 2.23e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.48 on 36 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3972, Adjusted R-squared: 0.3805
## F-statistic: 23.73 on 1 and 36 DF, p-value: 2.227e-05
##
##
## $Height
##
## Call:
## lm(formula = x ~ Gender, data = dta4, na.action = na.omit)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.132 -2.432 0.235 2.152 5.568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.7650 0.6298 104.42 < 2e-16 ***
## GenderMale 5.6666 0.9023 6.28 2.62e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.816 on 37 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.516, Adjusted R-squared: 0.5029
## F-statistic: 39.44 on 1 and 37 DF, p-value: 2.624e-07
xyplot(MRICount~value|variable,
groups=Gender,
data=dta41,
auto.key=list(column=2),
xlab="IQ", ylab="MRICount", type=c('p', 'g', 'r'), layout=c(3,1))
lapply(dta4[3:5], function(x)
summary(lm(MRICount+x~Gender, data=dta4)))
## $FSIQ
##
## Call:
## lm(formula = MRICount + x ~ Gender, data = dta4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74893 -34603 -7286 19996 128676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 862766 12502 69.008 < 2e-16 ***
## GenderMale 92204 17681 5.215 6.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 55910 on 38 degrees of freedom
## Multiple R-squared: 0.4171, Adjusted R-squared: 0.4018
## F-statistic: 27.19 on 1 and 38 DF, p-value: 6.774e-06
##
##
## $VIQ
##
## Call:
## lm(formula = MRICount + x ~ Gender, data = dta4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74888 -34605 -7294 20001 128677
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 862764 12502 69.010 < 2e-16 ***
## GenderMale 92207 17680 5.215 6.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 55910 on 38 degrees of freedom
## Multiple R-squared: 0.4172, Adjusted R-squared: 0.4018
## F-statistic: 27.2 on 1 and 38 DF, p-value: 6.767e-06
##
##
## $PIQ
##
## Call:
## lm(formula = MRICount + x ~ Gender, data = dta4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74894 -34596 -7278 19994 128671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 862765 12503 69.007 < 2e-16 ***
## GenderMale 92202 17681 5.215 6.78e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 55910 on 38 degrees of freedom
## Multiple R-squared: 0.4171, Adjusted R-squared: 0.4018
## F-statistic: 27.19 on 1 and 38 DF, p-value: 6.778e-06
yes, R-squared =0.4171, p<0.001