Data

Potthoff and Roy (1964) reported data on a study in 16 boys and 11 girls, who at ages 8, 10, 12, and 14 had the distance (mm) from the center of the pituitary gland to the pteryomaxillary fissure measured. Changes in pituitary-pteryomaxillary distances during growth is important in orthodontic therapy. We consider data from girls only here.

Files in a folder

Collect the file names.

flsM <- list.files(path = "./try_data", pattern = "m_")
flsF <- list.files(path = "./tmp_data", pattern = "f_")

Give files the full path to their location.

fLM <- paste0("./try_data/", flsM)
fLF <- paste0("./tmp_data/", flsF)

Input multiple files

ffM <- lapply(fLM, read.csv)
ffF<- lapply(fLF, read.csv)

Reduce approach

library(dplyr)
# Roll our own merging function
mrg2 <- function(f1, f2){                                
  merge(f1, f2, by="id")
}

# 分別merge 男生與女生的list file
dtaW.M<-Reduce(mrg2, ffM)%>% as.data.frame()
dtaW.F<-Reduce(mrg2, ffF)%>% as.data.frame()

# rbind 男生與女生的資料
dtaW.all <- rbind(dtaW.F,dtaW.M)
#
str(dtaW.M)
## 'data.frame':    16 obs. of  5 variables:
##  $ id : int  12 13 14 15 16 17 18 19 20 21 ...
##  $ d8 : num  26 21.5 23 25.5 20 24.5 22 24 23 27.5 ...
##  $ d10: num  25 22.5 22.5 27.5 23.5 25.5 22 21.5 20.5 28 ...
##  $ d12: num  29 23 24 26.5 22.5 27 24.5 24.5 31 31 ...
##  $ d14: num  31 26.5 27.5 27 26 28.5 26.5 25.5 26 31.5 ...
str(dtaW.F)
## 'data.frame':    11 obs. of  5 variables:
##  $ id : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ d8 : num  21 21 20.5 23.5 21.5 20 21.5 23 20 16.5 ...
##  $ d10: num  20 21.5 24 24.5 23 21 22.5 23 21 19 ...
##  $ d12: num  21.5 24 24.5 25 22.5 21 23 23.5 22 19 ...
##  $ d14: num  23 25.5 26 26.5 23.5 22.5 25 24 21.5 19.5 ...
str(dtaW.all)
## 'data.frame':    27 obs. of  5 variables:
##  $ id : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ d8 : num  21 21 20.5 23.5 21.5 20 21.5 23 20 16.5 ...
##  $ d10: num  20 21.5 24 24.5 23 21 22.5 23 21 19 ...
##  $ d12: num  21.5 24 24.5 25 22.5 21 23 23.5 22 19 ...
##  $ d14: num  23 25.5 26 26.5 23.5 22.5 25 24 21.5 19.5 ...
head(dtaW.all, 13)
##    id   d8  d10  d12  d14
## 1   1 21.0 20.0 21.5 23.0
## 2   2 21.0 21.5 24.0 25.5
## 3   3 20.5 24.0 24.5 26.0
## 4   4 23.5 24.5 25.0 26.5
## 5   5 21.5 23.0 22.5 23.5
## 6   6 20.0 21.0 21.0 22.5
## 7   7 21.5 22.5 23.0 25.0
## 8   8 23.0 23.0 23.5 24.0
## 9   9 20.0 21.0 22.0 21.5
## 10 10 16.5 19.0 19.0 19.5
## 11 11 24.5 25.0 28.0 28.0
## 12 12 26.0 25.0 29.0 31.0
## 13 13 21.5 22.5 23.0 26.5

Tidy approach

library(tidyverse)
# 分別merge 男生與女生的list file
dtaW2.M<-ffM %>% reduce(inner_join, by='id')%>% as.data.frame()
dtaW2.F<-ffF %>% reduce(inner_join, by='id')%>% as.data.frame()
# rbind 男生與女生的資料
dtaW2.all <- rbind(dtaW2.F,dtaW2.M)
#
str(dtaW2.all)
## 'data.frame':    27 obs. of  5 variables:
##  $ id : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ d8 : num  21 21 20.5 23.5 21.5 20 21.5 23 20 16.5 ...
##  $ d10: num  20 21.5 24 24.5 23 21 22.5 23 21 19 ...
##  $ d12: num  21.5 24 24.5 25 22.5 21 23 23.5 22 19 ...
##  $ d14: num  23 25.5 26 26.5 23.5 22.5 25 24 21.5 19.5 ...

Vertical direction

# convert list of data frames to matrices
ffM <- lapply(ffM, as.matrix)
ffF <- lapply(ffF, as.matrix)
# 分別針對男生女生先做rbind(ID與pp_distance) & cbind(year variable)
# augment data with a new column variable 'year'
dtaL.F <- cbind(Reduce(rbind, ffF), 
              year=rep(c(8,10,12,14), c(11,11,11,11))) %>% as.data.frame()
dtaL.M <- cbind(Reduce(rbind, ffM), 
              year=rep(c(8,10,12,14), c(16,16,16,16))) %>% as.data.frame()

# rename the second column
names(dtaL.F)[2] <- "pp_distance"
names(dtaL.M)[2] <- "pp_distance"

# rbind 整體男生與女生的資料
dtaL.all <- rbind(dtaL.F,dtaL.M)

#
str(dtaL.all)
## 'data.frame':    108 obs. of  3 variables:
##  $ id         : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ pp_distance: num  21 21 20.5 23.5 21.5 20 21.5 23 20 16.5 ...
##  $ year       : num  8 8 8 8 8 8 8 8 8 8 ...