#pacman是一個管理R包的工具,加載之後,採用p_load函數對包進行安裝和加載 p_load(ggplot2, EBImage, jpeg, ggpubr, plotly),以pacman::p_load() 同時取代 install.packages(“A”,“B”) 和 library(A, B)
# require(pacman)
::p_load(mice)
pacmandata(potthoffroy)
subset(potthoffroy, sex=='M')
## id sex d8 d10 d12 d14
## 12 12 M 26.0 25.0 29.0 31.0
## 13 13 M 21.5 22.5 23.0 26.5
## 14 14 M 23.0 22.5 24.0 27.5
## 15 15 M 25.5 27.5 26.5 27.0
## 16 16 M 20.0 23.5 22.5 26.0
## 17 17 M 24.5 25.5 27.0 28.5
## 18 18 M 22.0 22.0 24.5 26.5
## 19 19 M 24.0 21.5 24.5 25.5
## 20 20 M 23.0 20.5 31.0 26.0
## 21 21 M 27.5 28.0 31.0 31.5
## 22 22 M 23.0 23.0 23.5 25.0
## 23 23 M 21.5 23.5 24.0 28.0
## 24 24 M 17.0 24.5 26.0 29.5
## 25 25 M 22.5 25.5 25.5 26.0
## 26 26 M 23.0 24.5 26.0 30.0
## 27 27 M 22.0 21.5 23.5 25.0
#dir.create() 設置新的工作目錄, getwd() 檢視目前工作目錄
這段的意思是把M的資料 貼到檔案是 M_ 裡面嗎?[, c(1, i)]這個是什麼意思
#folder叫tmp_data, 檔名M_
list.files("./tmp_data/", pattern="M_")
## [1] "M_0.csv" "M_1.csv" "M_2.csv" "M_3.csv" "M_4.csv"
read.csv("./tmp_data/M_1.csv")
## id d8
## 1 12 26.0
## 2 13 21.5
## 3 14 23.0
## 4 15 25.5
## 5 16 20.0
## 6 17 24.5
## 7 18 22.0
## 8 19 24.0
## 9 20 23.0
## 10 21 27.5
## 11 22 23.0
## 12 23 21.5
## 13 24 17.0
## 14 25 22.5
## 15 26 23.0
## 16 27 22.0
<- list.files(path = "./tmp_data", pattern = "M_")
mls mls
## [1] "M_0.csv" "M_1.csv" "M_2.csv" "M_3.csv" "M_4.csv"
<- paste0("./tmp_data/", mls)
mL mL
## [1] "./tmp_data/M_0.csv" "./tmp_data/M_1.csv" "./tmp_data/M_2.csv"
## [4] "./tmp_data/M_3.csv" "./tmp_data/M_4.csv"
Input these files as a list of data frames
<- lapply(mL, read.csv) mm
We can merge two files by id.
merge(mm[1], mm[2])
## id d8
## 1 12 26.0
## 2 13 21.5
## 3 14 23.0
## 4 15 25.5
## 5 16 20.0
## 6 17 24.5
## 7 18 22.0
## 8 19 24.0
## 9 20 23.0
## 10 21 27.5
## 11 22 23.0
## 12 23 21.5
## 13 24 17.0
## 14 25 22.5
## 15 26 23.0
## 16 27 22.0
#這是回圈作併檔
# Roll our own merging function
<- function(m1, m2){
mrg2 merge(m1, m2, by="id")
}Reduce(mrg2, mm)
## id d8.x d8.y d10 d12 d14
## 1 12 26.0 26.0 25.0 29.0 31.0
## 2 13 21.5 21.5 22.5 23.0 26.5
## 3 14 23.0 23.0 22.5 24.0 27.5
## 4 15 25.5 25.5 27.5 26.5 27.0
## 5 16 20.0 20.0 23.5 22.5 26.0
## 6 17 24.5 24.5 25.5 27.0 28.5
## 7 18 22.0 22.0 22.0 24.5 26.5
## 8 19 24.0 24.0 21.5 24.5 25.5
## 9 20 23.0 23.0 20.5 31.0 26.0
## 10 21 27.5 27.5 28.0 31.0 31.5
## 11 22 23.0 23.0 23.0 23.5 25.0
## 12 23 21.5 21.5 23.5 24.0 28.0
## 13 24 17.0 17.0 24.5 26.0 29.5
## 14 25 22.5 22.5 25.5 25.5 26.0
## 15 26 23.0 23.0 24.5 26.0 30.0
## 16 27 22.0 22.0 21.5 23.5 25.0
我的理解似乎是把整理出來的M資料取一個檔案名稱的概念,這裡我們叫他mm, 我查資料它包含了dplyr、ggplot2,與stringr等數據處理常用包,這裡我們似乎沒有用到其他功能
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks mice::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
|> reduce(inner_join, by='id') mm
## id d8.x d8.y d10 d12 d14
## 1 12 26.0 26.0 25.0 29.0 31.0
## 2 13 21.5 21.5 22.5 23.0 26.5
## 3 14 23.0 23.0 22.5 24.0 27.5
## 4 15 25.5 25.5 27.5 26.5 27.0
## 5 16 20.0 20.0 23.5 22.5 26.0
## 6 17 24.5 24.5 25.5 27.0 28.5
## 7 18 22.0 22.0 22.0 24.5 26.5
## 8 19 24.0 24.0 21.5 24.5 25.5
## 9 20 23.0 23.0 20.5 31.0 26.0
## 10 21 27.5 27.5 28.0 31.0 31.5
## 11 22 23.0 23.0 23.0 23.5 25.0
## 12 23 21.5 21.5 23.5 24.0 28.0
## 13 24 17.0 17.0 24.5 26.0 29.5
## 14 25 22.5 22.5 25.5 25.5 26.0
## 15 26 23.0 23.0 24.5 26.0 30.0
## 16 27 22.0 22.0 21.5 23.5 25.0
# require(pacman)
::p_load(mice)
pacmandata(potthoffroy)
subset(potthoffroy, sex=='F')
## id sex d8 d10 d12 d14
## 1 1 F 21.0 20.0 21.5 23.0
## 2 2 F 21.0 21.5 24.0 25.5
## 3 3 F 20.5 24.0 24.5 26.0
## 4 4 F 23.5 24.5 25.0 26.5
## 5 5 F 21.5 23.0 22.5 23.5
## 6 6 F 20.0 21.0 21.0 22.5
## 7 7 F 21.5 22.5 23.0 25.0
## 8 8 F 23.0 23.0 23.5 24.0
## 9 9 F 20.0 21.0 22.0 21.5
## 10 10 F 16.5 19.0 19.0 19.5
## 11 11 F 24.5 25.0 28.0 28.0
dir.create(file.path(getwd(), "./tmp_data"), showWarnings=FALSE)
lapply(3:6, function(i) {
write.csv(subset(potthoffroy, sex=='F')[, c(1, i)],
file=paste0("./tmp_data/F_", i-2, ".csv"),
row.names=FALSE)
} )
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
#Files in a folder
list.files("./tmp_data/", pattern="f_")
## [1] "f_1.csv" "f_2.csv" "f_3.csv" "f_4.csv"
read.csv("./tmp_data/f_1.csv")
## id d8
## 1 1 21.0
## 2 2 21.0
## 3 3 20.5
## 4 4 23.5
## 5 5 21.5
## 6 6 20.0
## 7 7 21.5
## 8 8 23.0
## 9 9 20.0
## 10 10 16.5
## 11 11 24.5
<- list.files(path = "./tmp_data", pattern = "f_")
fls fls
## [1] "f_1.csv" "f_2.csv" "f_3.csv" "f_4.csv"
<- paste0("./tmp_data/", fls)
fL fL
## [1] "./tmp_data/f_1.csv" "./tmp_data/f_2.csv" "./tmp_data/f_3.csv"
## [4] "./tmp_data/f_4.csv"
#Input multiple files
Input these files as a list of data frames
<- lapply(fL, read.csv) ff
#Merge
We can merge two files by id.
merge(ff[1], ff[2])
## id d8 d10
## 1 1 21.0 20.0
## 2 2 21.0 21.5
## 3 3 20.5 24.0
## 4 4 23.5 24.5
## 5 5 21.5 23.0
## 6 6 20.0 21.0
## 7 7 21.5 22.5
## 8 8 23.0 23.0
## 9 9 20.0 21.0
## 10 10 16.5 19.0
## 11 11 24.5 25.0
#Reduce
前面是merg2 這裡改成merg3
# Roll our own merging function
<- function(f1, f2){
mrg3 merge(f1, f2, by="id")
}Reduce(mrg3, ff)
## id d8 d10 d12 d14
## 1 1 21.0 20.0 21.5 23.0
## 2 2 21.0 21.5 24.0 25.5
## 3 3 20.5 24.0 24.5 26.0
## 4 4 23.5 24.5 25.0 26.5
## 5 5 21.5 23.0 22.5 23.5
## 6 6 20.0 21.0 21.0 22.5
## 7 7 21.5 22.5 23.0 25.0
## 8 8 23.0 23.0 23.5 24.0
## 9 9 20.0 21.0 22.0 21.5
## 10 10 16.5 19.0 19.0 19.5
## 11 11 24.5 25.0 28.0 28.0
library(tidyverse)
|> reduce(inner_join, by='id') ff
## id d8 d10 d12 d14
## 1 1 21.0 20.0 21.5 23.0
## 2 2 21.0 21.5 24.0 25.5
## 3 3 20.5 24.0 24.5 26.0
## 4 4 23.5 24.5 25.0 26.5
## 5 5 21.5 23.0 22.5 23.5
## 6 6 20.0 21.0 21.0 22.5
## 7 7 21.5 22.5 23.0 25.0
## 8 8 23.0 23.0 23.5 24.0
## 9 9 20.0 21.0 22.0 21.5
## 10 10 16.5 19.0 19.0 19.5
## 11 11 24.5 25.0 28.0 28.0
為何我會留下id.1 ~id.4
We can merge two files by id.
merge(ff, mm, all = TRUE)
## id d8 id.1 d10 id.2 d12 id.3 d14 d8.1 id.4
## 1 1 21.0 1 20.0 1 21.5 1 23.0 NA NA
## 2 2 21.0 2 21.5 2 24.0 2 25.5 NA NA
## 3 3 20.5 3 24.0 3 24.5 3 26.0 NA NA
## 4 4 23.5 4 24.5 4 25.0 4 26.5 NA NA
## 5 5 21.5 5 23.0 5 22.5 5 23.5 NA NA
## 6 6 20.0 6 21.0 6 21.0 6 22.5 NA NA
## 7 7 21.5 7 22.5 7 23.0 7 25.0 NA NA
## 8 8 23.0 8 23.0 8 23.5 8 24.0 NA NA
## 9 9 20.0 9 21.0 9 22.0 9 21.5 NA NA
## 10 10 16.5 10 19.0 10 19.0 10 19.5 NA NA
## 11 11 24.5 11 25.0 11 28.0 11 28.0 NA NA
## 12 12 26.0 12 25.0 12 29.0 12 31.0 26.0 12
## 13 13 21.5 13 22.5 13 23.0 13 26.5 21.5 13
## 14 14 23.0 14 22.5 14 24.0 14 27.5 23.0 14
## 15 15 25.5 15 27.5 15 26.5 15 27.0 25.5 15
## 16 16 20.0 16 23.5 16 22.5 16 26.0 20.0 16
## 17 17 24.5 17 25.5 17 27.0 17 28.5 24.5 17
## 18 18 22.0 18 22.0 18 24.5 18 26.5 22.0 18
## 19 19 24.0 19 21.5 19 24.5 19 25.5 24.0 19
## 20 20 23.0 20 20.5 20 31.0 20 26.0 23.0 20
## 21 21 27.5 21 28.0 21 31.0 21 31.5 27.5 21
## 22 22 23.0 22 23.0 22 23.5 22 25.0 23.0 22
## 23 23 21.5 23 23.5 23 24.0 23 28.0 21.5 23
## 24 24 17.0 24 24.5 24 26.0 24 29.5 17.0 24
## 25 25 22.5 25 25.5 25 25.5 25 26.0 22.5 25
## 26 26 23.0 26 24.5 26 26.0 26 30.0 23.0 26
## 27 27 22.0 27 21.5 27 23.5 27 25.0 22.0 27