Inclass exercise

inclass 1

The R script illustrates how to implement ‘small multiples’ in base graphics given the 4 different diets of the ChickWeight{datasets} example.

#載入資料
dta <- ChickWeight
#578個觀察值，4個variables
str(dta)

Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame':  578 obs. of  4 variables:
 $ weight: num  42 51 59 64 76 93 106 125 149 171 ...
 $ Time  : num  0 2 4 6 8 10 12 14 16 18 ...
 $ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
 $ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
 - attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
 - attr(*, "outer")=Class 'formula'  language ~Diet
  .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
 - attr(*, "labels")=List of 2
  ..$ x: chr "Time"
  ..$ y: chr "Body weight"
 - attr(*, "units")=List of 2
  ..$ x: chr "(days)"
  ..$ y: chr "(gm)"

#split資料分割，以dta$Diet(4個variables)切割成4個list
dta_diet <- split(dta, dta$Diet)
str(dta_diet)

List of 4
 $ 1:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 220 obs. of  4 variables:
  ..$ weight: num [1:220] 42 51 59 64 76 93 106 125 149 171 ...
  ..$ Time  : num [1:220] 0 2 4 6 8 10 12 14 16 18 ...
  ..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 15 15 15 15 15 15 15 15 15 15 ...
  ..$ Diet  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
  ..- attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "outer")=Class 'formula'  language ~Diet
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "labels")=List of 2
  .. ..$ x: chr "Time"
  .. ..$ y: chr "Body weight"
  ..- attr(*, "units")=List of 2
  .. ..$ x: chr "(days)"
  .. ..$ y: chr "(gm)"
 $ 2:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 120 obs. of  4 variables:
  ..$ weight: num [1:120] 40 50 62 86 125 163 217 240 275 307 ...
  ..$ Time  : num [1:120] 0 2 4 6 8 10 12 14 16 18 ...
  ..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 30 30 30 30 30 30 30 30 30 30 ...
  ..$ Diet  : Factor w/ 4 levels "1","2","3","4": 2 2 2 2 2 2 2 2 2 2 ...
  ..- attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "outer")=Class 'formula'  language ~Diet
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "labels")=List of 2
  .. ..$ x: chr "Time"
  .. ..$ y: chr "Body weight"
  ..- attr(*, "units")=List of 2
  .. ..$ x: chr "(days)"
  .. ..$ y: chr "(gm)"
 $ 3:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 120 obs. of  4 variables:
  ..$ weight: num [1:120] 42 53 62 73 85 102 123 138 170 204 ...
  ..$ Time  : num [1:120] 0 2 4 6 8 10 12 14 16 18 ...
  ..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 34 34 34 34 34 34 34 34 34 34 ...
  ..$ Diet  : Factor w/ 4 levels "1","2","3","4": 3 3 3 3 3 3 3 3 3 3 ...
  ..- attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "outer")=Class 'formula'  language ~Diet
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "labels")=List of 2
  .. ..$ x: chr "Time"
  .. ..$ y: chr "Body weight"
  ..- attr(*, "units")=List of 2
  .. ..$ x: chr "(days)"
  .. ..$ y: chr "(gm)"
 $ 4:Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 118 obs. of  4 variables:
  ..$ weight: num [1:118] 42 51 66 85 103 124 155 153 175 184 ...
  ..$ Time  : num [1:118] 0 2 4 6 8 10 12 14 16 18 ...
  ..$ Chick : Ord.factor w/ 50 levels "18"<"16"<"15"<..: 44 44 44 44 44 44 44 44 44 44 ...
  ..$ Diet  : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 4 4 4 4 4 ...
  ..- attr(*, "formula")=Class 'formula'  language weight ~ Time | Chick
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "outer")=Class 'formula'  language ~Diet
  .. .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv> 
  ..- attr(*, "labels")=List of 2
  .. ..$ x: chr "Time"
  .. ..$ y: chr "Body weight"
  ..- attr(*, "units")=List of 2
  .. ..$ x: chr "(days)"
  .. ..$ y: chr "(gm)"

#mfcol, mfrow 設置小圖數量與位置，取值為數值型向量c(nr,nc)形式
#表示把圖分為nr行nc列個小圖，圖形順序按列排（mfcol）或按行排（mfrow）
#The default of mfrow or mfcol is c(1,1)
#mar 設置圖形空白邊界行數
#mar=c(底,左,上,右) 圖的四個邊上指定的邊距線數
#The default of mar is c(5.1, 4.1, 4.1, 2.1)  打par("mar")可出現
par(mfrow=c(2, 2), mar=c(4,4,0,0))

#畫dta_diet圖
lapply(dta_diet,
       function(x) {
         plot(x$weight ~ x$Time, 
              xlab="Time (day)", 
              ylab="Weight (gm)")
#legend加上圖的描述
#bty圖例框是否畫出，y為畫出，default為n不畫出
         legend('topleft', 
                paste("Diet", x$Diet[1], sep="="),
                bty='n')}
       )

$`1`
$`1`$rect
$`1`$rect$w
[1] 7.067274

$`1`$rect$h
[1] 72.46349

$`1`$rect$left
[1] -0.84

$`1`$rect$top
[1] 315.8


$`1`$text
$`1`$text$x
[1] 1.577517

$`1`$text$y
[1] 279.5683



$`2`
$`2`$rect
$`2`$rect$w
[1] 7.067274

$`2`$rect$h
[1] 78.36792

$`2`$rect$left
[1] -0.84

$`2`$rect$top
[1] 342.68


$`2`$text
$`2`$text$x
[1] 1.577517

$`2`$text$y
[1] 303.496



$`3`
$`3`$rect
$`3`$rect$w
[1] 7.067274

$`3`$rect$h
[1] 89.64002

$`3`$rect$left
[1] -0.84

$`3`$rect$top
[1] 386.36


$`3`$text
$`3`$text$x
[1] 1.577517

$`3`$text$y
[1] 341.54



$`4`
$`4`$rect
$`4`$rect$w
[1] 7.067274

$`4`$rect$h
[1] 75.95247

$`4`$rect$left
[1] -0.84

$`4`$rect$top
[1] 333.32


$`4`$text
$`4`$text$x
[1] 1.577517

$`4`$text$y
[1] 295.3438

Adapt the script to produce a plot of 5 panels in which each panel shows a histogram of IQ for each of 5 classes with over 30 pupils in the nlschools{MASS} dataset.

#載入資料
library(MASS)
dta <- nlschools
#2287個觀察值，6個variables
str(dta)

'data.frame':   2287 obs. of  6 variables:
 $ lang : int  46 45 33 46 20 30 30 57 36 36 ...
 $ IQ   : num  15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
 $ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ GS   : int  29 29 29 29 29 29 29 29 29 29 ...
 $ SES  : int  23 10 15 23 10 10 23 10 13 15 ...
 $ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...

#split資料分割，以dta$class切割成list
dta_class <- split(dta, dta$class)
# find the classes with over 30 pupils
pupils30<-lapply(dta_class, function(x) nrow(x)>30) 
#把pupils30==TRUE的選出來
which(pupils30==TRUE)

 5480 15580 15980 16180 18380 
   26    78    80    82    91

which(pupils30==TRUE)

par(mfrow=c(3,2 ), mar=c(3,3,2,2))

#畫dta_class圖
#只選column name="5480","15580","15980","16180","18380"
#hist(v,main,xlab,xlim,ylim,breaks,col,border)
lapply(dta_class[c("5480","15580","15980","16180","18380")],
       function(x) {
         hist(x$IQ,
              main="histogram of IQ",
              xlab="number of students", 
              breaks=20,
              ylab="IQ")
#legend加上圖的描述
#bty圖例框是否畫出，y為畫出，default為n不畫出
         legend('topright', 
                paste("class", x$class[1], sep="="),
                bty='n')}
       )

$`5480`
$`5480`$rect
$`5480`$rect$w
[1] 2.821875

$`5480`$rect$h
[1] 2.540674

$`5480`$rect$left
[1] 15.47812

$`5480`$rect$top
[1] 6.24


$`5480`$text
$`5480`$text$x
[1] 16.16351

$`5480`$text$y
[1] 4.969663



$`15580`
$`15580`$rect
$`15580`$rect$w
[1] 2.835673

$`15580`$rect$h
[1] 2.117228

$`15580`$rect$left
[1] 12.44433

$`15580`$rect$top
[1] 5.2


$`15580`$text
$`15580`$text$x
[1] 13.08402

$`15580`$text$y
[1] 4.141386



$`15980`
$`15980`$rect
$`15980`$rect$w
[1] 3.24077

$`15980`$rect$h
[1] 2.117228

$`15980`$rect$left
[1] 14.57923

$`15980`$rect$top
[1] 5.2


$`15980`$text
$`15980`$text$x
[1] 15.31031

$`15980`$text$y
[1] 4.141386



$`16180`
$`16180`$rect
$`16180`$rect$w
[1] 3.645866

$`16180`$rect$h
[1] 2.96412

$`16180`$rect$left
[1] 13.71413

$`16180`$rect$top
[1] 7.28


$`16180`$text
$`16180`$text$x
[1] 14.5366

$`16180`$text$y
[1] 5.79794



$`18380`
$`18380`$rect
$`18380`$rect$w
[1] 3.645866

$`18380`$rect$h
[1] 2.96412

$`18380`$rect$left
[1] 11.71413

$`18380`$rect$top
[1] 7.28


$`18380`$text
$`18380`$text$x
[1] 12.5366

$`18380`$text$y
[1] 5.79794

inclass 2

Deaths per 100,000 from male suicides for 5 age groups and 15 countries are given in the table below. The data set is available as suicides2{HSAUR3}. Construct a side-by-side box plot of the data across different age groups and comment briefly.

library(HSAUR3)
library(tools)
#載入資料
dta<-suicides2
#15 obs. of  5 variables
str(dta)

'data.frame':   15 obs. of  5 variables:
 $ A25.34: num  22 9 22 29 16 28 48 7 8 26 ...
 $ A35.44: num  27 19 19 40 25 35 65 8 11 29 ...
 $ A45.54: num  31 10 21 52 36 41 84 11 18 36 ...
 $ A55.64: num  34 14 31 53 47 49 81 18 20 32 ...
 $ A65.74: num  24 27 49 69 56 52 107 27 28 28 ...

#改column name
colnames(dta)=c("age25-34","age35-44","age45-54", "age55-64","age65-64")
#wide to long
dta_long<-stack(dta)
colnames(dta_long)=c("deaths", "age")
head(dta_long)

  deaths      age
1     22 age25-34
2      9 age25-34
3     22 age25-34
4     29 age25-34
5     16 age25-34
6     28 age25-34

par(mfrow=c(1, 1), mar=c(4,4,1,1))

#最陽春的畫法
plot(deaths ~ age,
     data=dta_long, 
     frame=FALSE,
     main="Deaths per 100,000 from male suicides",
     xlab="Deaths",
     ylab="Age",
     cex.axis=0.6)

## boxplot + stripchart
boxplot(deaths ~ age,
        data=dta_long,
        frame=F,
        horizontal=T,
        varwidth=T,
        cex.axis=.6,
        main="Deaths per 100,000 from male suicides",
        xlab="Deaths",
        ylab="Age")
#abline輔助線
#abline(a=NULL, b=NULL, h=NULL, v=NULL, …)
#a, b:它指定線的截距和斜率
#h:為水平線指定 y-value
#v:為垂直線指定 x-value(s)
abline(v=seq(20,100,20), lty=3, col='gray')
abline(v=mean(dta_long$deaths), lty=5, col='red')
#stripchart多疊一層圖
stripchart(deaths ~ age,
           data=dta_long,
           add=T,
           col='darkgray', pch=1,
           cex=.8,
           method='stack')

從圖示感覺上男性自殺率會隨著年紀增高

inclass 3

Use the dataset to replicate the plot below:

knitr::include_graphics("sat_gpa.png")

#匯入資料
dta=read.table("d:/sat_gpa.txt",header=T) 
head(dta)

          College SAT_No GPA_No SAT_Yes GPA_Yes
1         Barnard   1210   3.08    1317    3.30
2    Northwestern   1243   3.10    1333    3.24
3         Bowdoin   1200   2.85    1312    3.12
4           Colby   1220   2.90    1280    3.04
5 Carnegie Mellon   1237   2.70    1308    2.94
6    Georgia Tech   1233   2.62    1287    2.80

#6 obs. of  5 variables
str(dta)

'data.frame':   6 obs. of  5 variables:
 $ College: chr  "Barnard" "Northwestern" "Bowdoin" "Colby" ...
 $ SAT_No : int  1210 1243 1200 1220 1237 1233
 $ GPA_No : num  3.08 3.1 2.85 2.9 2.7 2.62
 $ SAT_Yes: int  1317 1333 1312 1280 1308 1287
 $ GPA_Yes: num  3.3 3.24 3.12 3.04 2.94 2.8

plot(x=dta$SAT_No,     
     y=dta$GPA_No,       
     xlab="SAT",       
     ylab="GPA",
     pch=16, 
     bty='o',
     xlim=c(1150,1400),
     ylim=c(2.6, 3.4) 
     )
# points圖再疊上剛剛的plot
points(x=dta$SAT_Yes,     
     y=dta$GPA_Yes,       
     xlab="SAT (V+M)",       
     ylab="GPA",
     pch=1, 
     bty='o',
     xlim=c(1150,1400),
     ylim=c(2.6, 3.4),
     panel.first=abline(h=seq(17, 23, 1), 
                                     col="grey80", 
                                     lty=3),
     )
# 感覺語法沒錯，但是沒有線...
lines(dta$GPA_No,dta$GPA_Yes)
# segments也是沒有線 沒有線 沒有線!!!!
segments(dta$GPA_No, dta$SAT_No, dta$GPA_Yes, dta$SAT_Yes,
         lty=1, 
         lwd=1,  
         col="black")

# 參考昶濬及婉禎畫線的語法
plot(x=dta$SAT_No,     
     y=dta$GPA_No,       
     xlab="SAT (V+M)",       
     ylab="First Year GPA",
     pch=16, 
     bty='o',
     xlim=c(1150,1400),
     ylim=c(2.6, 3.4) 
     )
points(x=dta$SAT_Yes,     
     y=dta$GPA_Yes,       
     pch=1, 
     bty='o',
     xlim=c(1150,1400),
     ylim=c(2.6, 3.4),
     panel.first=abline(h=seq(17, 23, 1), 
                                     col="grey80", 
                                     lty=3),
     )
#多了指定dta[c(3),]和dta[-c(3),]
with(dta[c(3),], 
     segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, 
              lty=1, 
              lwd=2,  
              col="black")
     )
with(dta[-c(3),], 
     segments(SAT_No, GPA_No, SAT_Yes, GPA_Yes, 
              lty=1, 
              lwd=1,  
              col="black")
     )
# 在圖上加字
with(dta[c(3),],
     text(SAT_Yes+20, GPA_Yes,
          label=College, 
          cex=0.8, 
          font=2,
          srt=0) 
     )
with(dta[-c(3),],
     text(SAT_Yes+30, GPA_Yes,
          label=College, 
          cex=0.8, 
          srt=0))
# 畫axis，以length分割，labels default是TRUE
axis(1, seq(1150, 1400, length=16), labels = F)
axis(2, seq(2.6, 3.4, length=21), labels = F)

Exercise 1 (未完成)

The Law School Admission Test (LSAT) consists of a number of dichotomous items, which can be answered correctly or incorrecty. These items are constructed so that a person who scores high on the test is more likely to do well in a law school. The data set LSAT{ltm} contains answers by 1,000 persons to 5 test items. Generate a plot similar to the one below:

knitr::include_graphics("total score.png")

library(ltm)
dta<-LSAT
str(dta)

'data.frame':   1000 obs. of  5 variables:
 $ Item 1: int  0 0 0 0 0 0 0 0 0 0 ...
 $ Item 2: int  0 0 0 0 0 0 0 0 0 0 ...
 $ Item 3: int  0 0 0 0 0 0 0 0 0 0 ...
 $ Item 4: int  0 0 0 0 0 0 0 0 0 1 ...
 $ Item 5: int  0 0 0 1 1 1 1 1 1 0 ...

head(dta)

  Item 1 Item 2 Item 3 Item 4 Item 5
1      0      0      0      0      0
2      0      0      0      0      0
3      0      0      0      0      0
4      0      0      0      0      1
5      0      0      0      0      1
6      0      0      0      0      1

Exercise 2

Complete the R script to create the following plot

knitr::include_graphics("BMI.png")

wt <- seq(40, 160, length.out=600)
# generate heights
ht <- seq(1.40, 2.00, length.out=600)
# cross wt by ht
wtht <- expand.grid(wt=wt, ht=ht)
bmi_wh <- function(w, h) {w/(h*h)}
str(bmi_wh)

function (w, h)  
 - attr(*, "srcref")= 'srcref' int [1:8] 6 11 6 34 11 34 6 6
  ..- attr(*, "srcfile")=Classes 'srcfilecopy', 'srcfile' <environment: 0x0000000022c59fe8>

class(bmi_wh)

[1] "function"

# height, weight and bmi
# generate weights
# 做一組數值，範圍40~160
# length.out變動範圍，固定個數，之間的差值則有所不同，且不一定為整數
# length.out=600，40~160中間切成600等份
wt <- seq(40, 160, length.out=600)
# generate heights
ht <- seq(1.40, 2.00, length.out=600)
# cross wt by ht
# expand.grid()R語言中的函數用於創建一個 DataFrame 
wtht <- expand.grid(wt=wt, ht=ht)
# 'data.frame': 360000 obs(=600*600). of  2 variables(wt,ht) 
str(wtht)

'data.frame':   360000 obs. of  2 variables:
 $ wt: num  40 40.2 40.4 40.6 40.8 ...
 $ ht: num  1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 1.4 ...
 - attr(*, "out.attrs")=List of 2
  ..$ dim     : Named int [1:2] 600 600
  .. ..- attr(*, "names")= chr [1:2] "wt" "ht"
  ..$ dimnames:List of 2
  .. ..$ wt: chr [1:600] "wt= 40.00000" "wt= 40.20033" "wt= 40.40067" "wt= 40.60100" ...
  .. ..$ ht: chr [1:600] "ht=1.400000" "ht=1.401002" "ht=1.402003" "ht=1.403005" ...

# function to compute bmi from wt and ht
# 建立一個BIM公式的function
bmi_wh <- function(w, h) {w/(h*h)}
class(bmi_wh)

[1] "function"

# 測試一下這個function
# 計算出來BMI=40
bmi_wh(90, 1.50)

[1] 40

# generate data matrix
bmiwtht <- matrix(bmi_wh(wtht$wt, wtht$ht), length(ht), length(wt))
class(bmiwtht)

[1] "matrix" "array"

str(bmiwtht)

 num [1:600, 1:600] 20.4 20.5 20.6 20.7 20.8 ...

# draw the contour
# contour輪廓圖
# levels= c(按照BMI分級)
# bty圖例框是否畫出，o為畫出，default為n不畫出
contour(wt, ht, bmiwtht, bty='n', 
        levels = c(18.5, 24.9, 30), 
# drawlabels是線條的labels, default是TRUE
        drawlabels=FALSE,    
        ylab="Height (m)",
        xlab="Weight (kg)",
        main="BMI categories by height and weight")

# add grid lines
grid()


# annotate the bmi categories

# 在圖上加字
# 在X軸105,Y軸1.8的位置寫"Obese"，字體大小cex=1，字體角度srt=45度(=90垂直)
text(105, 1.8, "Obese", cex=1, srt=45)
text(92, 1.8, "Overweight", cex=1, srt=45)
text(75, 1.8, "Normal", cex=1, srt=45)
text(55, 1.8, "Underweight", cex=1, srt=55)

# 在圖上圖顏色(多邊形繪圖)
# X,Y軸對應的點=c(a,b,c,d)
# 對應4個點連成4角形
# col="塗滿顏色"，border="邊線顏色"
polygon(x=c(60,120,160,160),
        y=c(1.4, 2.0,2.0,1.4),
        col="pink", border="red")

這邊發現，polygon沒有給到對的邊界指令。

# 再試一次
bmiwtht <- matrix(bmi_wh(wtht$wt, wtht$ht), length(ht), length(wt))
class(bmiwtht)

[1] "matrix" "array"

str(bmiwtht)

 num [1:600, 1:600] 20.4 20.5 20.6 20.7 20.8 ...

contour(wt, ht, bmiwtht, bty='n', 
        levels = c(18.5, 24.9, 30), 
        drawlabels=FALSE,    
        ylab="Height (m)",
        xlab="Weight (kg)",
        main="BMI categories by height and weight")

# add grid lines
grid()

# filled contour填滿功能
# 放在annotate the bmi categories會蓋到字
# levels從obese開始30~無限大(本來設n<-Inf，c(30, n)但無法成功
.filled.contour(wt, ht, bmiwtht,levels = c(30, 150), col= "pink")

# annotate the bmi categories(同上面的圖)
text(105, 1.8, "Obese", cex=1, srt=45)
text(92, 1.8, "Overweight", cex=1, srt=45)
text(75, 1.8, "Normal", cex=1, srt=45)
text(55, 1.8, "Underweight", cex=1, srt=55)

照理說，polygon語法應該還是有解決的方法，但還沒想出來

Exercise 3(未完成)

Use the free recall data to replicate the figure.MURDOCK 1962:The format of the data in these files is serial position as a function of output position. Each row of data corresponds to one subject=trial. For example, if the data were: 20 19 18 15 1 5 8 that means that the subject recalled item 20, then item 19, etc. The code 88 means that the subject made an extra-list intrusion.

knitr::include_graphics("freecell.png")

# 載入資料(有6個txt檔)
fL<-"D:/Murd62/fr10-2.txt"
# read.table(fL, sep="",header=FALSE)會出現錯誤碼如下，後來發現txt檔有空值
# Error in scan(file = file...: line 3 did not have 7 elements)
# 加上fill=T，帶入空值(NA)
dta10_2 <- read.table(fL, sep="",header=FALSE, fill=T)
#'data.frame':  1607 obs. of  7 variables
str(dta10_2)

'data.frame':   1607 obs. of  7 variables:
 $ V1: int  6 10 10 10 10 6 8 9 8 10 ...
 $ V2: int  1 9 7 88 3 8 9 10 9 8 ...
 $ V3: int  4 6 9 7 7 10 10 4 10 9 ...
 $ V4: int  7 8 8 2 2 5 7 6 7 2 ...
 $ V5: int  10 2 1 1 1 3 88 88 2 3 ...
 $ V6: int  2 1 NA 5 9 88 4 8 NA NA ...
 $ V7: int  8 88 NA NA NA NA NA 1 NA NA ...

#改column name
colnames(dta10_2)=c(1:7)
#新增group
dta10_2$Group<-as.factor("10-2")
str(dta10_2)

'data.frame':   1607 obs. of  8 variables:
 $ 1    : int  6 10 10 10 10 6 8 9 8 10 ...
 $ 2    : int  1 9 7 88 3 8 9 10 9 8 ...
 $ 3    : int  4 6 9 7 7 10 10 4 10 9 ...
 $ 4    : int  7 8 8 2 2 5 7 6 7 2 ...
 $ 5    : int  10 2 1 1 1 3 88 88 2 3 ...
 $ 6    : int  2 1 NA 5 9 88 4 8 NA NA ...
 $ 7    : int  8 88 NA NA NA NA NA 1 NA NA ...
 $ Group: Factor w/ 1 level "10-2": 1 1 1 1 1 1 1 1 1 1 ...

#匯入其他資料fr15-2、fr20-1、fr20-2、fr30-1、fr40-1
fL<-"D:/Murd62/fr15-2.txt"
dta15_2 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta15_2)

'data.frame':   2017 obs. of  7 variables:
 $ V1: int  15 15 14 15 15 3 13 14 14 9 ...
 $ V2: int  12 14 15 13 14 13 15 15 15 88 ...
 $ V3: int  14 3 12 12 12 14 10 12 5 NA ...
 $ V4: int  10 4 10 2 13 12 1 10 13 NA ...
 $ V5: int  11 2 5 10 1 88 2 3 2 NA ...
 $ V6: int  NA 1 NA 6 88 2 3 NA 1 NA ...
 $ V7: int  NA NA NA 8 NA 5 4 NA 4 NA ...

colnames(dta15_2)=c(1:7)
dta15_2$Group<-as.factor("15-2")

fL<-"D:/Murd62/fr20-1.txt"
dta20_1 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta20_1)

'data.frame':   1293 obs. of  10 variables:
 $ V1 : int  20 20 17 18 18 18 19 20 19 20 ...
 $ V2 : int  19 18 18 19 19 19 20 19 20 18 ...
 $ V3 : int  13 16 8 20 20 20 18 17 88 12 ...
 $ V4 : int  18 17 20 15 17 17 11 11 5 19 ...
 $ V5 : int  1 3 15 16 12 1 17 15 17 NA ...
 $ V6 : int  9 88 14 8 11 10 1 10 1 NA ...
 $ V7 : int  2 14 1 9 13 7 2 1 NA NA ...
 $ V8 : int  17 NA 2 1 88 NA NA NA NA NA ...
 $ V9 : int  16 NA 19 2 NA NA NA NA NA NA ...
 $ V10: int  88 NA NA NA NA NA NA NA NA NA ...

colnames(dta20_1)=c(1:10)
dta20_1$Group<-as.factor("20-1")

fL<-"D:/Murd62/fr20-2.txt"
dta20_2 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta20_2)=c(1:8)
dta20_2$Group<-as.factor("20-2")

fL<-"D:/Murd62/fr30-1.txt"
dta30_1 <- read.table(fL, sep="",header=FALSE, fill=T)
str(dta30_1)

'data.frame':   1761 obs. of  9 variables:
 $ V1: int  30 1 31 30 1 30 30 28 30 30 ...
 $ V2: int  27 4 29 28 30 28 29 29 29 29 ...
 $ V3: int  6 29 27 29 28 29 9 30 28 19 ...
 $ V4: int  29 30 23 27 27 7 13 27 26 88 ...
 $ V5: int  15 26 17 21 22 25 11 10 27 88 ...
 $ V6: int  23 25 14 88 18 12 NA 13 17 23 ...
 $ V7: int  8 88 16 19 25 18 NA NA NA 11 ...
 $ V8: int  NA NA NA 26 24 88 NA NA NA NA ...
 $ V9: int  NA NA NA NA 16 NA NA NA NA NA ...

colnames(dta30_1)=c(1:9)
dta30_1$Group<-as.factor("30-1")

fL<-"D:/Murd62/fr40-1.txt"
dta40_1 <- read.table(fL, sep="",header=FALSE, fill=T)
colnames(dta40_1)=c(1:10)
dta40_1$Group<-as.factor("40-1")

Exercise 4(幾乎快，差找出藍箭頭)

(Optional) A dataset, raz2005{rmcorr}, contains two repeated measures, on two occasions (Time), of age (in years) and adjusted volume of cerebellar hemispheres from 72 participants. Use it to replicate the following figure in which participants older than 65 years of age at Time One are plotted on a shifted panel and those whose cerebellar volume increased from Time One to Time Two are shown in blue color.

library(rmcorr)
library(tidyverse)
dta<-raz2005
str(dta)

'data.frame':   144 obs. of  4 variables:
 $ Participant: int  1 1 2 2 3 3 4 4 5 5 ...
 $ Time       : int  1 2 1 2 1 2 1 2 1 2 ...
 $ Age        : num  24 29.3 21 25.9 23.9 ...
 $ Volume     : num  168 158 164 152 150 ...

head(dta)

  Participant Time      Age   Volume
1           1    1 24.04637 168.0261
2           1    2 29.28124 157.5085
3           2    1 21.02195 164.0444
4           2    2 25.91324 152.3567
5           3    1 23.92362 150.4820
6           3    2 29.57359 146.3979

# long to wide data
# 按照Time 1或Time 2，分配Age和volume
# 使用tidyverse %>%功能
dta_l<-dta %>% 
  pivot_wider(names_from = "Time",
              values_from = c("Age", "Volume")) 
head(dta_l) |> knitr::kable()

Participant	Age_1	Age_2	Volume_1	Volume_2
1	24.04637	29.28124	168.0261	157.5085
2	21.02195	25.91324	164.0444	152.3567
3	23.92362	29.57359	150.4820	146.3979
4	19.96398	25.96329	139.9491	135.0468
5	38.90674	44.13654	158.4602	149.6970
6	76.90995	81.80462	154.7810	141.9237

dta_l$count=dta_l$Volume_2-dta_l$Volume_1
summary(dta_l)

  Participant        Age_1           Age_2          Volume_1    
 Min.   : 1.00   Min.   :19.96   Min.   :25.91   Min.   :109.6  
 1st Qu.:18.75   1st Qu.:43.08   1st Qu.:48.49   1st Qu.:126.2  
 Median :36.50   Median :54.96   Median :58.99   Median :136.2  
 Mean   :36.50   Mean   :52.80   Mean   :57.80   Mean   :135.2  
 3rd Qu.:54.25   3rd Qu.:63.25   3rd Qu.:68.57   3rd Qu.:143.6  
 Max.   :72.00   Max.   :79.24   Max.   :81.93   Max.   :168.0  
    Volume_2         count        
 Min.   :105.3   Min.   :-12.857  
 1st Qu.:121.7   1st Qu.: -5.669  
 Median :132.2   Median : -3.909  
 Mean   :131.1   Mean   : -4.086  
 3rd Qu.:139.7   3rd Qu.: -1.998  
 Max.   :157.5   Max.   : 10.770

str(dta_l)

tibble [72 x 6] (S3: tbl_df/tbl/data.frame)
 $ Participant: int [1:72] 1 2 3 4 5 6 7 8 9 10 ...
 $ Age_1      : num [1:72] 24 21 23.9 20 38.9 ...
 $ Age_2      : num [1:72] 29.3 25.9 29.6 26 44.1 ...
 $ Volume_1   : num [1:72] 168 164 150 140 158 ...
 $ Volume_2   : num [1:72] 158 152 146 135 150 ...
 $ count      : num [1:72] -10.52 -11.69 -4.08 -4.9 -8.76 ...

# find the dta_1$count time2-time1>0的，表示有變好
better<-lapply(dta_l$count, function(x) nrow(x)>1) 
# 結果竟然找不到??
# 必須要想別的方法
which(better==TRUE)

integer(0)

不了解which(better==TRUE)為何會變成integer(0)…

#參考婉禎的做法
better<-dta_l$Volume_1 < dta_l$Volume_2
which(better==TRUE)

[1] 18 22 29 36 44 62 63

with(dta_l, plot(Age_1, Volume_1, 
                xlab="Age", 
                ylab="Brain volume", 
                pch = 20, 
                cex = 1, 
                bty='o',
                xlim=c(18, 88),
                ylim=c(100, 200),
                main="Age with Brain Volume"))

with(dta_l, points(Age_2, Volume_2, 
                xlab="Age", 
                ylab="Brain volume", 
                pch = 20, 
                cex = 1, 
                bty='o',
                xlim=c(18, 88),
                ylim=c(100, 200),
                main="Age with Brain Volume"))
with(dta_l, arrows(Age_1,
                   Volume_1,
                   Age_2,
                   Volume_2,
                   length=.08,
                   lty=1,
                   cex=0.5))

arrows(Age_1,Volume_1,Age_2,Volume_2,length=.08,lty=1,cex=0.5,col = “lightblue”))