The R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example.
#載入資料
dta <- ChickWeight
#578個觀察值,4個variables
str(dta)Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 578 obs. of 4 variables:
$ weight: num 42 51 59 64 76 93 106 125 149 171 ...
$ Time : num 0 2 4 6 8 10 12 14 16 18 ...
$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
$ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
- attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
.. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
- attr(*, "outer")=Class 'formula' language ~Diet
.. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
- attr(*, "labels")=List of 2
..$ x: chr "Time"
..$ y: chr "Body weight"
- attr(*, "units")=List of 2
..$ x: chr "(days)"
..$ y: chr "(gm)"
#split資料分割,以dta$Diet(4個variables)切割成4個list
dta_diet <- split(dta, dta$Diet)
str(dta_diet)List of 4
$ 1:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 220 obs. of 4 variables:
..$ weight: num [1:220] 42 51 59 64 76 93 106 125 149 171 ...
..$ Time : num [1:220] 0 2 4 6 8 10 12 14 16 18 ...
..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
..$ Diet : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "outer")=Class 'formula' language ~Diet
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "labels")=List of 2
.. ..$ x: chr "Time"
.. ..$ y: chr "Body weight"
..- attr(*, "units")=List of 2
.. ..$ x: chr "(days)"
.. ..$ y: chr "(gm)"
$ 2:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 120 obs. of 4 variables:
..$ weight: num [1:120] 40 50 62 86 125 163 217 240 275 307 ...
..$ Time : num [1:120] 0 2 4 6 8 10 12 14 16 18 ...
..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 30 30 30 30 30 30 30 30 30 30 ...
..$ Diet : Factor w/ 4 levels "1","2","3","4": 2 2 2 2 2 2 2 2 2 2 ...
..- attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "outer")=Class 'formula' language ~Diet
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "labels")=List of 2
.. ..$ x: chr "Time"
.. ..$ y: chr "Body weight"
..- attr(*, "units")=List of 2
.. ..$ x: chr "(days)"
.. ..$ y: chr "(gm)"
$ 3:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 120 obs. of 4 variables:
..$ weight: num [1:120] 42 53 62 73 85 102 123 138 170 204 ...
..$ Time : num [1:120] 0 2 4 6 8 10 12 14 16 18 ...
..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 34 34 34 34 34 34 34 34 34 34 ...
..$ Diet : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 3 3 3 3 ...
..- attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "outer")=Class 'formula' language ~Diet
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "labels")=List of 2
.. ..$ x: chr "Time"
.. ..$ y: chr "Body weight"
..- attr(*, "units")=List of 2
.. ..$ x: chr "(days)"
.. ..$ y: chr "(gm)"
$ 4:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 118 obs. of 4 variables:
..$ weight: num [1:118] 42 51 66 85 103 124 155 153 175 184 ...
..$ Time : num [1:118] 0 2 4 6 8 10 12 14 16 18 ...
..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 44 44 44 44 44 44 44 44 44 44 ...
..$ Diet : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 4 4 4 4 4 ...
..- attr(*, "formula")=Class 'formula' language weight ~ Time | Chick
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "outer")=Class 'formula' language ~Diet
.. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
..- attr(*, "labels")=List of 2
.. ..$ x: chr "Time"
.. ..$ y: chr "Body weight"
..- attr(*, "units")=List of 2
.. ..$ x: chr "(days)"
.. ..$ y: chr "(gm)"
#mfcol, mfrow 設置小圖數量與位置,取值為數值型向量c(nr,nc)形式
#表示把圖分為nr行nc列個小圖,圖形順序按列排(mfcol)或按行排(mfrow)
#The default of mfrow or mfcol is c(1,1)
#mar 設置圖形空白邊界行數
#mar=c(底,左,上,右) 圖的四個邊上指定的邊距線數
#The default of mar is c(5.1, 4.1, 4.1, 2.1) 打par("mar")可出現
par(mfrow=c(2, 2), mar=c(4,4,0,0))
#畫dta_diet圖
lapply(dta_diet,
function(x) {
plot(x$weight ~ x$Time,
xlab="Time (day)",
ylab="Weight (gm)")
#legend加上圖的描述
#bty圖例框是否畫出,y為畫出,default為n不畫出
legend('topleft',
paste("Diet", x$Diet[1], sep="="),
bty='n')}
)$`1`
$`1`$rect
$`1`$rect$w
[1] 7.067274
$`1`$rect$h
[1] 72.46349
$`1`$rect$left
[1] -0.84
$`1`$rect$top
[1] 315.8
$`1`$text
$`1`$text$x
[1] 1.577517
$`1`$text$y
[1] 279.5683
$`2`
$`2`$rect
$`2`$rect$w
[1] 7.067274
$`2`$rect$h
[1] 78.36792
$`2`$rect$left
[1] -0.84
$`2`$rect$top
[1] 342.68
$`2`$text
$`2`$text$x
[1] 1.577517
$`2`$text$y
[1] 303.496
$`3`
$`3`$rect
$`3`$rect$w
[1] 7.067274
$`3`$rect$h
[1] 89.64002
$`3`$rect$left
[1] -0.84
$`3`$rect$top
[1] 386.36
$`3`$text
$`3`$text$x
[1] 1.577517
$`3`$text$y
[1] 341.54
$`4`
$`4`$rect
$`4`$rect$w
[1] 7.067274
$`4`$rect$h
[1] 75.95247
$`4`$rect$left
[1] -0.84
$`4`$rect$top
[1] 333.32
$`4`$text
$`4`$text$x
[1] 1.577517
$`4`$text$y
[1] 295.3438
Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.
#載入資料
library(MASS)
dta <- nlschools
#2287個觀察值,6個variables
str(dta)'data.frame': 2287 obs. of 6 variables:
$ lang : int 46 45 33 46 20 30 30 57 36 36 ...
$ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
$ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
$ GS : int 29 29 29 29 29 29 29 29 29 29 ...
$ SES : int 23 10 15 23 10 10 23 10 13 15 ...
$ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
#split資料分割,以dta$class切割成list
dta_class <- split(dta, dta$class)
# find the classes with over 30 pupils
pupils30<-lapply(dta_class, function(x) nrow(x)>30)
#把pupils30==TRUE的選出來
which(pupils30==TRUE) 5480 15580 15980 16180 18380
26 78 80 82 91
which(pupils30==TRUE)
par(mfrow=c(3,2 ), mar=c(3,3,2,2))
#畫dta_class圖
#只選column name="5480","15580","15980","16180","18380"
#hist(v,main,xlab,xlim,ylim,breaks,col,border)
lapply(dta_class[c("5480","15580","15980","16180","18380")],
function(x) {
hist(x$IQ,
main="histogram of IQ",
xlab="number of students",
breaks=20,
ylab="IQ")
#legend加上圖的描述
#bty圖例框是否畫出,y為畫出,default為n不畫出
legend('topright',
paste("class", x$class[1], sep="="),
bty='n')}
)$`5480`
$`5480`$rect
$`5480`$rect$w
[1] 2.821875
$`5480`$rect$h
[1] 2.540674
$`5480`$rect$left
[1] 15.47812
$`5480`$rect$top
[1] 6.24
$`5480`$text
$`5480`$text$x
[1] 16.16351
$`5480`$text$y
[1] 4.969663
$`15580`
$`15580`$rect
$`15580`$rect$w
[1] 2.835673
$`15580`$rect$h
[1] 2.117228
$`15580`$rect$left
[1] 12.44433
$`15580`$rect$top
[1] 5.2
$`15580`$text
$`15580`$text$x
[1] 13.08402
$`15580`$text$y
[1] 4.141386
$`15980`
$`15980`$rect
$`15980`$rect$w
[1] 3.24077
$`15980`$rect$h
[1] 2.117228
$`15980`$rect$left
[1] 14.57923
$`15980`$rect$top
[1] 5.2
$`15980`$text
$`15980`$text$x
[1] 15.31031
$`15980`$text$y
[1] 4.141386
$`16180`
$`16180`$rect
$`16180`$rect$w
[1] 3.645866
$`16180`$rect$h
[1] 2.96412
$`16180`$rect$left
[1] 13.71413
$`16180`$rect$top
[1] 7.28
$`16180`$text
$`16180`$text$x
[1] 14.5366
$`16180`$text$y
[1] 5.79794
$`18380`
$`18380`$rect
$`18380`$rect$w
[1] 3.645866
$`18380`$rect$h
[1] 2.96412
$`18380`$rect$left
[1] 11.71413
$`18380`$rect$top
[1] 7.28
$`18380`$text
$`18380`$text$x
[1] 12.5366
$`18380`$text$y
[1] 5.79794
Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct a side-by-side box plot of the data across different age groups and comment briefly.
library(HSAUR3)
library(tools)
#載入資料
dta<-suicides2
#15 obs. of 5 variables
str(dta)'data.frame': 15 obs. of 5 variables:
$ A25.34: num 22 9 22 29 16 28 48 7 8 26 ...
$ A35.44: num 27 19 19 40 25 35 65 8 11 29 ...
$ A45.54: num 31 10 21 52 36 41 84 11 18 36 ...
$ A55.64: num 34 14 31 53 47 49 81 18 20 32 ...
$ A65.74: num 24 27 49 69 56 52 107 27 28 28 ...
#改column name
colnames(dta)=c("age25-34","age35-44","age45-54", "age55-64","age65-64")
#wide to long
dta_long<-stack(dta)
colnames(dta_long)=c("deaths", "age")
head(dta_long) deaths age
1 22 age25-34
2 9 age25-34
3 22 age25-34
4 29 age25-34
5 16 age25-34
6 28 age25-34
par(mfrow=c(1, 1), mar=c(4,4,1,1))
#最陽春的畫法
plot(deaths ~ age,
data=dta_long,
frame=FALSE,
main="Deaths per 100,000 from male suicides",
xlab="Deaths",
ylab="Age",
cex.axis=0.6)## boxplot + stripchart
boxplot(deaths ~ age,
data=dta_long,
frame=F,
horizontal=T,
varwidth=T,
cex.axis=.6,
main="Deaths per 100,000 from male suicides",
xlab="Deaths",
ylab="Age")
#abline輔助線
#abline(a=NULL, b=NULL, h=NULL, v=NULL, …)
#a, b:它指定線的截距和斜率
#h:為水平線指定 y-value
#v:為垂直線指定 x-value(s)
abline(v=seq(20,100,20), lty=3, col='gray')
abline(v=mean(dta_long$deaths), lty=5, col='red')
#stripchart多疊一層圖
stripchart(deaths ~ age,
data=dta_long,
add=T,
col='darkgray', pch=1,
cex=.8,
method='stack')從圖示感覺上男性自殺率會隨著年紀增高
Use the dataset to replicate the plot below:
knitr::include_graphics("sat_gpa.png")#匯入資料
dta=read.table("d:/sat_gpa.txt",header=T)
head(dta) College SAT_No GPA_No SAT_Yes GPA_Yes
1 Barnard 1210 3.08 1317 3.30
2 Northwestern 1243 3.10 1333 3.24
3 Bowdoin 1200 2.85 1312 3.12
4 Colby 1220 2.90 1280 3.04
5 Carnegie Mellon 1237 2.70 1308 2.94
6 Georgia Tech 1233 2.62 1287 2.80
#6 obs. of 5 variables
str(dta)'data.frame': 6 obs. of 5 variables:
$ College: chr "Barnard" "Northwestern" "Bowdoin" "Colby" ...
$ SAT_No : int 1210 1243 1200 1220 1237 1233
$ GPA_No : num 3.08 3.1 2.85 2.9 2.7 2.62
$ SAT_Yes: int 1317 1333 1312 1280 1308 1287
$ GPA_Yes: num 3.3 3.24 3.12 3.04 2.94 2.8
plot(x=dta$SAT_No,
y=dta$GPA_No,
xlab="SAT",
ylab="GPA",
pch=16,
bty='o',
xlim=c(1150,1400),
ylim=c(2.6, 3.4)
)
# points圖再疊上剛剛的plot
points(x=dta$SAT_Yes,
y=dta$GPA_Yes,
xlab="SAT (V+M)",
ylab="GPA",
pch=1,
bty='o',
xlim=c(1150,1400),
ylim=c(2.6, 3.4),
panel.first=abline(h=seq(17, 23, 1),
col="grey80",
lty=3),
)
# 感覺語法沒錯,但是沒有線...
lines(dta$GPA_No,dta$GPA_Yes)
# segments也是沒有線 沒有線 沒有線!!!!
segments(dta$GPA_No, dta$SAT_No, dta$GPA_Yes, dta$SAT_Yes,
lty=1,
lwd=1,
col="black")# 參考昶濬及婉禎畫線的語法
plot(x=dta$SAT_No,
y=dta$GPA_No,
xlab="SAT (V+M)",
ylab="First Year GPA",
pch=16,
bty='o',
xlim=c(1150,1400),
ylim=c(2.6, 3.4)
)
points(x=dta$SAT_Yes,
y=dta$GPA_Yes,
pch=1,
bty='o',
xlim=c(1150,1400),
ylim=c(2.6, 3.4),
panel.first=abline(h=seq(17, 23, 1),
col="grey80",
lty=3),
)
#多了指定dta[c(3),]和dta[-c(3),]
with(dta[c(3),],
segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes,
lty=1,
lwd=2,
col="black")
)
with(dta[-c(3),],
segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes,
lty=1,
lwd=1,
col="black")
)
# 在圖上加字
with(dta[c(3),],
text(SAT_Yes+20, GPA_Yes,
label=College,
cex=0.8,
font=2,
srt=0)
)
with(dta[-c(3),],
text(SAT_Yes+30, GPA_Yes,
label=College,
cex=0.8,
srt=0))
# 畫axis,以length分割,labels default是TRUE
axis(1, seq(1150, 1400, length=16), labels = F)
axis(2, seq(2.6, 3.4, length=21), labels = F)The Law School Admission Test (LSAT) consists of a number of dichotomous items, which can be answered correctly or incorrecty. These items are constructed so that a person who scores high on the test is more likely to do well in a law school. The data set LSAT{ltm} contains answers by 1,000 persons to 5 test items. Generate a plot similar to the one below:
knitr::include_graphics("total score.png")library(ltm)
dta<-LSAT
str(dta)'data.frame': 1000 obs. of 5 variables:
$ Item 1: int 0 0 0 0 0 0 0 0 0 0 ...
$ Item 2: int 0 0 0 0 0 0 0 0 0 0 ...
$ Item 3: int 0 0 0 0 0 0 0 0 0 0 ...
$ Item 4: int 0 0 0 0 0 0 0 0 0 1 ...
$ Item 5: int 0 0 0 1 1 1 1 1 1 0 ...
head(dta) Item 1 Item 2 Item 3 Item 4 Item 5
1 0 0 0 0 0
2 0 0 0 0 0
3 0 0 0 0 0
4 0 0 0 0 1
5 0 0 0 0 1
6 0 0 0 0 1
Complete the R script to create the following plot
knitr::include_graphics("BMI.png")wt <- seq(40, 160, length.out=600)
# generate heights
ht <- seq(1.40, 2.00, length.out=600)
# cross wt by ht
wtht <- expand.grid(wt=wt, ht=ht)
bmi_wh <- function(w, h) {w/(h*h)}
str(bmi_wh) function (w, h)
- attr(*, "srcref")= 'srcref' int [1:8] 6 11 6 34 11 34 6 6
..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000022c59fe8>
class(bmi_wh)[1] "function"
# height, weight and bmi
# generate weights
# 做一組數值,範圍40~160
# length.out變動範圍,固定個數,之間的差值則有所不同,且不一定為整數
# length.out=600,40~160中間切成600等份
wt <- seq(40, 160, length.out=600)
# generate heights
ht <- seq(1.40, 2.00, length.out=600)
# cross wt by ht
# expand.grid()R語言中的函數用於創建一個 DataFrame
wtht <- expand.grid(wt=wt, ht=ht)
# 'data.frame': 360000 obs(=600*600). of 2 variables(wt,ht)
str(wtht)'data.frame': 360000 obs. of 2 variables:
$ wt: num 40 40.2 40.4 40.6 40.8 ...
$ ht: num 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
- attr(*, "out.attrs")=List of 2
..$ dim : Named int [1:2] 600 600
.. ..- attr(*, "names")= chr [1:2] "wt" "ht"
..$ dimnames:List of 2
.. ..$ wt: chr [1:600] "wt= 40.00000" "wt= 40.20033" "wt= 40.40067" "wt= 40.60100" ...
.. ..$ ht: chr [1:600] "ht=1.400000" "ht=1.401002" "ht=1.402003" "ht=1.403005" ...
# function to compute bmi from wt and ht
# 建立一個BIM公式的function
bmi_wh <- function(w, h) {w/(h*h)}
class(bmi_wh)[1] "function"
# 測試一下這個function
# 計算出來BMI=40
bmi_wh(90, 1.50)[1] 40
# generate data matrix
bmiwtht <- matrix(bmi_wh(wtht$wt, wtht$ht), length(ht), length(wt))
class(bmiwtht)[1] "matrix" "array"
str(bmiwtht) num [1:600, 1:600] 20.4 20.5 20.6 20.7 20.8 ...
# draw the contour
# contour輪廓圖
# levels= c(按照BMI分級)
# bty圖例框是否畫出,o為畫出,default為n不畫出
contour(wt, ht, bmiwtht, bty='n',
levels = c(18.5, 24.9, 30),
# drawlabels是線條的labels, default是TRUE
drawlabels=FALSE,
ylab="Height (m)",
xlab="Weight (kg)",
main="BMI categories by height and weight")
# add grid lines
grid()
# annotate the bmi categories
# 在圖上加字
# 在X軸105,Y軸1.8的位置寫"Obese",字體大小cex=1,字體角度srt=45度(=90垂直)
text(105, 1.8, "Obese", cex=1, srt=45)
text(92, 1.8, "Overweight", cex=1, srt=45)
text(75, 1.8, "Normal", cex=1, srt=45)
text(55, 1.8, "Underweight", cex=1, srt=55)
# 在圖上圖顏色(多邊形繪圖)
# X,Y軸對應的點=c(a,b,c,d)
# 對應4個點連成4角形
# col="塗滿顏色",border="邊線顏色"
polygon(x=c(60,120,160,160),
y=c(1.4, 2.0,2.0,1.4),
col="pink", border="red")這邊發現,polygon沒有給到對的邊界指令。
# 再試一次
bmiwtht <- matrix(bmi_wh(wtht$wt, wtht$ht), length(ht), length(wt))
class(bmiwtht)[1] "matrix" "array"
str(bmiwtht) num [1:600, 1:600] 20.4 20.5 20.6 20.7 20.8 ...
contour(wt, ht, bmiwtht, bty='n',
levels = c(18.5, 24.9, 30),
drawlabels=FALSE,
ylab="Height (m)",
xlab="Weight (kg)",
main="BMI categories by height and weight")
# add grid lines
grid()
# filled contour填滿功能
# 放在annotate the bmi categories會蓋到字
# levels從obese開始30~無限大(本來設n<-Inf,c(30, n)但無法成功
.filled.contour(wt, ht, bmiwtht,levels = c(30, 150), col= "pink")
# annotate the bmi categories(同上面的圖)
text(105, 1.8, "Obese", cex=1, srt=45)
text(92, 1.8, "Overweight", cex=1, srt=45)
text(75, 1.8, "Normal", cex=1, srt=45)
text(55, 1.8, "Underweight", cex=1, srt=55)照理說,polygon語法應該還是有解決的方法,但還沒想出來
Use the free recall data to replicate the figure.MURDOCK 1962:The format of the data in these files is serial position as a function of output position. Each row of data corresponds to one subject=trial. For example, if the data were: 20 19 18 15 1 5 8 that means that the subject recalled item 20, then item 19, etc. The code 88 means that the subject made an extra-list intrusion.
knitr::include_graphics("freecell.png")# 載入資料(有6個txt檔)
fL<-"D:/Murd62/fr10-2.txt"
# read.table(fL, sep="",header=FALSE)會出現錯誤碼如下,後來發現txt檔有空值
# Error in scan(file = file...: line 3 did not have 7 elements)
# 加上fill=T,帶入空值(NA)
dta10_2 <- read.table(fL, sep="",header=FALSE, fill=T)
#'data.frame': 1607 obs. of 7 variables
str(dta10_2)'data.frame': 1607 obs. of 7 variables:
$ V1: int 6 10 10 10 10 6 8 9 8 10 ...
$ V2: int 1 9 7 88 3 8 9 10 9 8 ...
$ V3: int 4 6 9 7 7 10 10 4 10 9 ...
$ V4: int 7 8 8 2 2 5 7 6 7 2 ...
$ V5: int 10 2 1 1 1 3 88 88 2 3 ...
$ V6: int 2 1 NA 5 9 88 4 8 NA NA ...
$ V7: int 8 88 NA NA NA NA NA 1 NA NA ...
#改column name
colnames(dta10_2)=c(1:7)
#新增group
dta10_2$Group<-as.factor("10-2")
str(dta10_2)'data.frame': 1607 obs. of 8 variables:
$ 1 : int 6 10 10 10 10 6 8 9 8 10 ...
$ 2 : int 1 9 7 88 3 8 9 10 9 8 ...
$ 3 : int 4 6 9 7 7 10 10 4 10 9 ...
$ 4 : int 7 8 8 2 2 5 7 6 7 2 ...
$ 5 : int 10 2 1 1 1 3 88 88 2 3 ...
$ 6 : int 2 1 NA 5 9 88 4 8 NA NA ...
$ 7 : int 8 88 NA NA NA NA NA 1 NA NA ...
$ Group: Factor w/ 1 level "10-2": 1 1 1 1 1 1 1 1 1 1 ...
#匯入其他資料fr15-2、fr20-1、fr20-2、fr30-1、fr40-1
fL<-"D:/Murd62/fr15-2.txt"
dta15_2 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta15_2)'data.frame': 2017 obs. of 7 variables:
$ V1: int 15 15 14 15 15 3 13 14 14 9 ...
$ V2: int 12 14 15 13 14 13 15 15 15 88 ...
$ V3: int 14 3 12 12 12 14 10 12 5 NA ...
$ V4: int 10 4 10 2 13 12 1 10 13 NA ...
$ V5: int 11 2 5 10 1 88 2 3 2 NA ...
$ V6: int NA 1 NA 6 88 2 3 NA 1 NA ...
$ V7: int NA NA NA 8 NA 5 4 NA 4 NA ...
colnames(dta15_2)=c(1:7)
dta15_2$Group<-as.factor("15-2")
fL<-"D:/Murd62/fr20-1.txt"
dta20_1 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta20_1)'data.frame': 1293 obs. of 10 variables:
$ V1 : int 20 20 17 18 18 18 19 20 19 20 ...
$ V2 : int 19 18 18 19 19 19 20 19 20 18 ...
$ V3 : int 13 16 8 20 20 20 18 17 88 12 ...
$ V4 : int 18 17 20 15 17 17 11 11 5 19 ...
$ V5 : int 1 3 15 16 12 1 17 15 17 NA ...
$ V6 : int 9 88 14 8 11 10 1 10 1 NA ...
$ V7 : int 2 14 1 9 13 7 2 1 NA NA ...
$ V8 : int 17 NA 2 1 88 NA NA NA NA NA ...
$ V9 : int 16 NA 19 2 NA NA NA NA NA NA ...
$ V10: int 88 NA NA NA NA NA NA NA NA NA ...
colnames(dta20_1)=c(1:10)
dta20_1$Group<-as.factor("20-1")
fL<-"D:/Murd62/fr20-2.txt"
dta20_2 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta20_2)=c(1:8)
dta20_2$Group<-as.factor("20-2")
fL<-"D:/Murd62/fr30-1.txt"
dta30_1 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta30_1)'data.frame': 1761 obs. of 9 variables:
$ V1: int 30 1 31 30 1 30 30 28 30 30 ...
$ V2: int 27 4 29 28 30 28 29 29 29 29 ...
$ V3: int 6 29 27 29 28 29 9 30 28 19 ...
$ V4: int 29 30 23 27 27 7 13 27 26 88 ...
$ V5: int 15 26 17 21 22 25 11 10 27 88 ...
$ V6: int 23 25 14 88 18 12 NA 13 17 23 ...
$ V7: int 8 88 16 19 25 18 NA NA NA 11 ...
$ V8: int NA NA NA 26 24 88 NA NA NA NA ...
$ V9: int NA NA NA NA 16 NA NA NA NA NA ...
colnames(dta30_1)=c(1:9)
dta30_1$Group<-as.factor("30-1")
fL<-"D:/Murd62/fr40-1.txt"
dta40_1 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta40_1)=c(1:10)
dta40_1$Group<-as.factor("40-1")(Optional) A dataset, raz2005{rmcorr}, contains two repeated measures, on two occasions (Time), of age (in years) and adjusted volume of cerebellar hemispheres from 72 participants. Use it to replicate the following figure in which participants older than 65 years of age at Time One are plotted on a shifted panel and those whose cerebellar volume increased from Time One to Time Two are shown in blue color.
library(rmcorr)
library(tidyverse)
dta<-raz2005
str(dta)'data.frame': 144 obs. of 4 variables:
$ Participant: int 1 1 2 2 3 3 4 4 5 5 ...
$ Time : int 1 2 1 2 1 2 1 2 1 2 ...
$ Age : num 24 29.3 21 25.9 23.9 ...
$ Volume : num 168 158 164 152 150 ...
head(dta) Participant Time Age Volume
1 1 1 24.04637 168.0261
2 1 2 29.28124 157.5085
3 2 1 21.02195 164.0444
4 2 2 25.91324 152.3567
5 3 1 23.92362 150.4820
6 3 2 29.57359 146.3979
# long to wide data
# 按照Time 1或Time 2,分配Age和volume
# 使用tidyverse %>%功能
dta_l<-dta %>%
pivot_wider(names_from = "Time",
values_from = c("Age", "Volume"))
head(dta_l) |> knitr::kable()| Participant | Age_1 | Age_2 | Volume_1 | Volume_2 |
|---|---|---|---|---|
| 1 | 24.04637 | 29.28124 | 168.0261 | 157.5085 |
| 2 | 21.02195 | 25.91324 | 164.0444 | 152.3567 |
| 3 | 23.92362 | 29.57359 | 150.4820 | 146.3979 |
| 4 | 19.96398 | 25.96329 | 139.9491 | 135.0468 |
| 5 | 38.90674 | 44.13654 | 158.4602 | 149.6970 |
| 6 | 76.90995 | 81.80462 | 154.7810 | 141.9237 |
dta_l$count=dta_l$Volume_2-dta_l$Volume_1
summary(dta_l) Participant Age_1 Age_2 Volume_1
Min. : 1.00 Min. :19.96 Min. :25.91 Min. :109.6
1st Qu.:18.75 1st Qu.:43.08 1st Qu.:48.49 1st Qu.:126.2
Median :36.50 Median :54.96 Median :58.99 Median :136.2
Mean :36.50 Mean :52.80 Mean :57.80 Mean :135.2
3rd Qu.:54.25 3rd Qu.:63.25 3rd Qu.:68.57 3rd Qu.:143.6
Max. :72.00 Max. :79.24 Max. :81.93 Max. :168.0
Volume_2 count
Min. :105.3 Min. :-12.857
1st Qu.:121.7 1st Qu.: -5.669
Median :132.2 Median : -3.909
Mean :131.1 Mean : -4.086
3rd Qu.:139.7 3rd Qu.: -1.998
Max. :157.5 Max. : 10.770
str(dta_l)tibble [72 x 6] (S3: tbl_df/tbl/data.frame)
$ Participant: int [1:72] 1 2 3 4 5 6 7 8 9 10 ...
$ Age_1 : num [1:72] 24 21 23.9 20 38.9 ...
$ Age_2 : num [1:72] 29.3 25.9 29.6 26 44.1 ...
$ Volume_1 : num [1:72] 168 164 150 140 158 ...
$ Volume_2 : num [1:72] 158 152 146 135 150 ...
$ count : num [1:72] -10.52 -11.69 -4.08 -4.9 -8.76 ...
# find the dta_1$count time2-time1>0的,表示有變好
better<-lapply(dta_l$count, function(x) nrow(x)>1)
# 結果竟然找不到??
# 必須要想別的方法
which(better==TRUE)integer(0)
不了解which(better==TRUE)為何會變成integer(0)…
#參考婉禎的做法
better<-dta_l$Volume_1 < dta_l$Volume_2
which(better==TRUE)[1] 18 22 29 36 44 62 63
with(dta_l, plot(Age_1, Volume_1,
xlab="Age",
ylab="Brain volume",
pch = 20,
cex = 1,
bty='o',
xlim=c(18, 88),
ylim=c(100, 200),
main="Age with Brain Volume"))
with(dta_l, points(Age_2, Volume_2,
xlab="Age",
ylab="Brain volume",
pch = 20,
cex = 1,
bty='o',
xlim=c(18, 88),
ylim=c(100, 200),
main="Age with Brain Volume"))
with(dta_l, arrows(Age_1,
Volume_1,
Age_2,
Volume_2,
length=.08,
lty=1,
cex=0.5))arrows(Age_1,Volume_1,Age_2,Volume_2,length=.08,lty=1,cex=0.5,col = “lightblue”))