Merging Data Frames

Sean Wen

21st January 2019

Prologue

Concatanate entire data frames

# Create dataset no. 1
Name <- c("Alice", "Barbara", "Cathy", "Dunn")
Weight <- round(runif(4, 50, 90), digits=0)
Height <- round(runif(4, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Alice     70    155
## 2 Barbara     82    167
## 3   Cathy     50    169
## 4    Dunn     71    154
# Create dataset no. 2
Name <- c("Alice", "Barbara", "Cathy", "Dunn")
Horoscope <- c("Gemini", "Cancer", "Taurus", "Libra")
Country <- c("United Kingdom", "Korea", "Istanbul", "Indonesia")
df2 <- data.frame(Name, Horoscope, Country, stringsAsFactors=FALSE)
print(df2)
##      Name Horoscope        Country
## 1   Alice    Gemini United Kingdom
## 2 Barbara    Cancer          Korea
## 3   Cathy    Taurus       Istanbul
## 4    Dunn     Libra      Indonesia
# Concatanate data frames
df.merged <- cbind(df1, df2)
print(df.merged)
##      Name Weight Height    Name Horoscope        Country
## 1   Alice     70    155   Alice    Gemini United Kingdom
## 2 Barbara     82    167 Barbara    Cancer          Korea
## 3   Cathy     50    169   Cathy    Taurus       Istanbul
## 4    Dunn     71    154    Dunn     Libra      Indonesia

Append entire data frames

# Create dataset no. 1
Name <- c("Alice", "Barbara", "Cathy", "Dunn")
Horoscope <- c("Gemini", "Cancer", "Taurus", "Libra")
Country <- c("United Kingdom", "Korea", "Istanbul", "Indonesia")
df1 <- data.frame(Name, Horoscope, Country, stringsAsFactors=FALSE)
print(df1)
##      Name Horoscope        Country
## 1   Alice    Gemini United Kingdom
## 2 Barbara    Cancer          Korea
## 3   Cathy    Taurus       Istanbul
## 4    Dunn     Libra      Indonesia
# Create dataset no. 2
Name <- c("Taylor", "Ben", "Anne", "Joseph")
Horoscope <- c("Aries", "Sagittarius", "Scorpio", "Capricorn")
Country <- c("USA", "Japan", "China", "Spain")
df2 <- data.frame(Name, Horoscope, Country, stringsAsFactors=FALSE)
print(df2)
##     Name   Horoscope Country
## 1 Taylor       Aries     USA
## 2    Ben Sagittarius   Japan
## 3   Anne     Scorpio   China
## 4 Joseph   Capricorn   Spain
# Append data frames
df.merged <- rbind(df1, df2)
print(df.merged)
##      Name   Horoscope        Country
## 1   Alice      Gemini United Kingdom
## 2 Barbara      Cancer          Korea
## 3   Cathy      Taurus       Istanbul
## 4    Dunn       Libra      Indonesia
## 5  Taylor       Aries            USA
## 6     Ben Sagittarius          Japan
## 7    Anne     Scorpio          China
## 8  Joseph   Capricorn          Spain

Merging data frames

# Create dataset no. 1
Name <- c("Cathy", "Alice", "Dunn", "Barbara", "Yale")
Weight <- round(runif(5, 50, 90), digits=0)
Height <- round(runif(5, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Cathy     55    185
## 2   Alice     61    180
## 3    Dunn     54    188
## 4 Barbara     60    159
## 5    Yale     88    180
# Create dataset no. 2
Name <- c("Barbara", "Cathy", "Alice", "Dunn", "Michael")
Horoscope <- c("Gemini", "Cancer", "Taurus", "Libra", "Aries")
Country <- c("United Kingdom", "Korea", "Istanbul", "Indonesia", "Canada")
df2 <- data.frame(Name, Horoscope, Country, stringsAsFactors=FALSE)
print(df2)
##      Name Horoscope        Country
## 1 Barbara    Gemini United Kingdom
## 2   Cathy    Cancer          Korea
## 3   Alice    Taurus       Istanbul
## 4    Dunn     Libra      Indonesia
## 5 Michael     Aries         Canada
# Merge data frame
df.merged <- merge(x=df1, y=df2, by="Name")
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Alice     61    180    Taurus       Istanbul
## 2 Barbara     60    159    Gemini United Kingdom
## 3   Cathy     55    185    Cancer          Korea
## 4    Dunn     54    188     Libra      Indonesia
df.merged <- merge(x=df1, y=df2, by="Name", all=TRUE)
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Alice     61    180    Taurus       Istanbul
## 2 Barbara     60    159    Gemini United Kingdom
## 3   Cathy     55    185    Cancer          Korea
## 4    Dunn     54    188     Libra      Indonesia
## 5 Michael     NA     NA     Aries         Canada
## 6    Yale     88    180      <NA>           <NA>
df.merged <- merge(x=df1, y=df2, by="Name", all.x=TRUE)
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Alice     61    180    Taurus       Istanbul
## 2 Barbara     60    159    Gemini United Kingdom
## 3   Cathy     55    185    Cancer          Korea
## 4    Dunn     54    188     Libra      Indonesia
## 5    Yale     88    180      <NA>           <NA>
df.merged <- merge(x=df1, y=df2, by="Name", all.y=TRUE)
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Alice     61    180    Taurus       Istanbul
## 2 Barbara     60    159    Gemini United Kingdom
## 3   Cathy     55    185    Cancer          Korea
## 4    Dunn     54    188     Libra      Indonesia
## 5 Michael     NA     NA     Aries         Canada

Merging data frames

Name <- c("Cathy", "Alice", "Dunn", "Barbara", "Yale")
Weight <- round(runif(5, 50, 90), digits=0)
Height <- round(runif(5, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Cathy     64    162
## 2   Alice     61    158
## 3    Dunn     55    169
## 4 Barbara     69    156
## 5    Yale     60    154
# Create dataset no. 2
Name <- c("Barbara", "Barbara", "Cathy", "Alice", "Dunn", "Michael")
Colour <- c("Red", "Green", "Blue", "Yellow", "Purple", "Pink")
df2 <- data.frame(Name, Colour, stringsAsFactors=FALSE)
print(df2)
##      Name Colour
## 1 Barbara    Red
## 2 Barbara  Green
## 3   Cathy   Blue
## 4   Alice Yellow
## 5    Dunn Purple
## 6 Michael   Pink
# Merge data frame 
df.merged <- merge(x=df1, y=df2, by="Name", all.x=TRUE)
print(df.merged)
##      Name Weight Height Colour
## 1   Alice     61    158 Yellow
## 2 Barbara     69    156    Red
## 3 Barbara     69    156  Green
## 4   Cathy     64    162   Blue
## 5    Dunn     55    169 Purple
## 6    Yale     60    154   <NA>

Merging data frames

Name <- c("Cathy", "Alice", "Dunn", "Barbara", "Yale")
Weight <- round(runif(5, 50, 90), digits=0)
Height <- round(runif(5, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Cathy     80    177
## 2   Alice     73    169
## 3    Dunn     84    157
## 4 Barbara     73    181
## 5    Yale     70    180
# Create dataset no. 2
FirstName <- c("Barbara", "Cathy", "Alice", "Dunn", "Michael")
Colour <- c("Red", "Blue", "Yellow", "Purple", "Pink")
df2 <- data.frame(FirstName, Colour, stringsAsFactors=FALSE)
print(df2)
##   FirstName Colour
## 1   Barbara    Red
## 2     Cathy   Blue
## 3     Alice Yellow
## 4      Dunn Purple
## 5   Michael   Pink
# Merge data frame
df.merged <- merge(x=df1, y=df2, by.x="Name", by.y="FirstName", all.x=TRUE)
print(df.merged)
##      Name Weight Height Colour
## 1   Alice     73    169 Yellow
## 2 Barbara     73    181    Red
## 3   Cathy     80    177   Blue
## 4    Dunn     84    157 Purple
## 5    Yale     70    180   <NA>

Merging data frames

library(plyr)

Joining data frames

Name <- c("Cathy", "Alice", "Dunn", "Barbara", "Yale")
Weight <- round(runif(5, 50, 90), digits=0)
Height <- round(runif(5, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Cathy     82    152
## 2   Alice     58    163
## 3    Dunn     72    150
## 4 Barbara     77    163
## 5    Yale     76    176
# Create dataset no. 2
Name <- c("Barbara", "Cathy", "Alice", "Dunn", "Michael")
Horoscope <- c("Gemini", "Cancer", "Taurus", "Libra", "Aries")
Country <- c("United Kingdom", "Korea", "Istanbul", "Indonesia", "Canada")
df2 <- data.frame(Name, Horoscope, Country, stringsAsFactors=FALSE)
print(df2)
##      Name Horoscope        Country
## 1 Barbara    Gemini United Kingdom
## 2   Cathy    Cancer          Korea
## 3   Alice    Taurus       Istanbul
## 4    Dunn     Libra      Indonesia
## 5 Michael     Aries         Canada
# Merge data frame
df.merged <- join(x=df1, y=df2, by="Name")
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Cathy     82    152    Cancer          Korea
## 2   Alice     58    163    Taurus       Istanbul
## 3    Dunn     72    150     Libra      Indonesia
## 4 Barbara     77    163    Gemini United Kingdom
## 5    Yale     76    176      <NA>           <NA>

Joining data frames

df.merged <- join(x=df1, y=df2, by="Name", type="full")
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Cathy     82    152    Cancer          Korea
## 2   Alice     58    163    Taurus       Istanbul
## 3    Dunn     72    150     Libra      Indonesia
## 4 Barbara     77    163    Gemini United Kingdom
## 5    Yale     76    176      <NA>           <NA>
## 6 Michael     NA     NA     Aries         Canada
df.merged <- join(x=df1, y=df2, by="Name", type="inner")
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1   Cathy     82    152    Cancer          Korea
## 2   Alice     58    163    Taurus       Istanbul
## 3    Dunn     72    150     Libra      Indonesia
## 4 Barbara     77    163    Gemini United Kingdom
df.merged <- join(x=df1, y=df2, by="Name", type="right")
print(df.merged)
##      Name Weight Height Horoscope        Country
## 1 Barbara     77    163    Gemini United Kingdom
## 2   Cathy     82    152    Cancer          Korea
## 3   Alice     58    163    Taurus       Istanbul
## 4    Dunn     72    150     Libra      Indonesia
## 5 Michael     NA     NA     Aries         Canada

Joining data frames

Name <- c("Cathy", "Alice", "Dunn", "Barbara", "Yale")
Weight <- round(runif(5, 50, 90), digits=0)
Height <- round(runif(5, 150, 190), digits=0)
df1 <- data.frame(Name, Weight, Height, stringsAsFactors=FALSE)
print(df1)
##      Name Weight Height
## 1   Cathy     79    182
## 2   Alice     59    174
## 3    Dunn     81    177
## 4 Barbara     57    188
## 5    Yale     78    185
# Create dataset no. 2
Name <- c("Barbara", "Barbara", "Cathy", "Alice", "Dunn", "Michael")
Colour <- c("Red", "Green", "Blue", "Yellow", "Purple", "Pink")
df2 <- data.frame(Name, Colour, stringsAsFactors=FALSE)
print(df2)
##      Name Colour
## 1 Barbara    Red
## 2 Barbara  Green
## 3   Cathy   Blue
## 4   Alice Yellow
## 5    Dunn Purple
## 6 Michael   Pink
# Merge data frames 
df.merged <- join(x=df1, y=df2, by="Name")
print(df.merged)
##      Name Weight Height Colour
## 1   Cathy     79    182   Blue
## 2   Alice     59    174 Yellow
## 3    Dunn     81    177 Purple
## 4 Barbara     57    188    Red
## 5 Barbara     57    188  Green
## 6    Yale     78    185   <NA>

Joining data frames

# Merge data frames
df.merged <- join(x=df1, y=df2, by="Name", match="first")
print(df.merged)
##      Name Weight Height Colour
## 1   Cathy     79    182   Blue
## 2   Alice     59    174 Yellow
## 3    Dunn     81    177 Purple
## 4 Barbara     57    188    Red
## 5    Yale     78    185   <NA>

Joining data frames

Summary