library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- data.frame(
V1 = c(rep("a",5), rep("b",5)),
V2 = rep(c(1:5), 2),
V3 = c(101:110),
stringsAsFactors = TRUE
)
match <- data.frame(
V1 = c("a", "b"),
V2 = c(3, 4),
V4 = c(99,99),
stringsAsFactors = TRUE
)
inner_join(df,match) #This has the two matching records
## Joining by: c("V1", "V2")
## V1 V2 V3 V4
## 1 a 3 103 99
## 2 b 4 109 99
left_join(match,df)
## Joining by: c("V1", "V2")
## V1 V2 V4 V3
## 1 a 3 99 103
## 2 b 4 99 109
df$V1<-as.character(df$V1)
inner_join(df,match) #This has only 1 record, but does not give any warning
## Joining by: c("V1", "V2")
## V1 V2 V3 V4
## 1 a 4 104 99
left_join(match,df) #This produces the same result, Why does it match on 'a' but not on 'b'?
## Joining by: c("V1", "V2")
## V1 V2 V4 V3
## 1 a 3 99 NA
## 2 b 4 99 104
sessionInfo()
## R version 3.1.0 (2014-04-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_0.2
##
## loaded via a namespace (and not attached):
## [1] assertthat_0.1 digest_0.6.4 evaluate_0.5.5 formatR_0.10
## [5] htmltools_0.2.4 knitr_1.6 parallel_3.1.0 Rcpp_0.11.2
## [9] rmarkdown_0.2.49 stringr_0.6.2 tools_3.1.0