library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
## Warning: package 'dslabs' was built under R version 4.5.2
path <- system.file("extdata", package="dslabs")
filename <- file.path(path, "fertility-two-countries-example.csv")
wide_data <- read.csv(filename)
new_tidy_data <- pivot_longer(wide_data, `X1960`:`X2015`, names_to = "year", values_to = "fertility")
new_tidy_data$year <- str_sub(new_tidy_data$year, 2)
new_tidy_data
## # A tibble: 112 × 3
##    country year  fertility
##    <chr>   <chr>     <dbl>
##  1 Germany 1960       2.41
##  2 Germany 1961       2.44
##  3 Germany 1962       2.47
##  4 Germany 1963       2.49
##  5 Germany 1964       2.49
##  6 Germany 1965       2.48
##  7 Germany 1966       2.44
##  8 Germany 1967       2.37
##  9 Germany 1968       2.28
## 10 Germany 1969       2.17
## # ℹ 102 more rows
new_tidy_data <- new_tidy_data |>
  mutate(year = as.integer(year))
new_wide_data <- new_tidy_data %>%
  pivot_wider(names_from = year, values_from =fertility)
data(murders)
data(polls_us_election_2016)
tab1 <- slice(murders, 1:6) %>% 
  select(state, population)
tab2 <- results_us_election_2016 %>% 
  filter(state %in% c('Alabama', 'Alaska', 'Arizona', 'California', 'Connecticut', 'Delaware')) %>% select(state, electoral_votes)

Types of joining

left_join(tab1, tab2, by='state') 
##        state population electoral_votes
## 1    Alabama    4779736               9
## 2     Alaska     710231               3
## 3    Arizona    6392017              11
## 4   Arkansas    2915918              NA
## 5 California   37253956              55
## 6   Colorado    5029196              NA
right_join(tab2, tab1, by='state')
##        state electoral_votes population
## 1 California              55   37253956
## 2    Arizona              11    6392017
## 3    Alabama               9    4779736
## 4     Alaska               3     710231
## 5   Arkansas              NA    2915918
## 6   Colorado              NA    5029196
right_join(tab1, tab2, by='state')
##         state population electoral_votes
## 1     Alabama    4779736               9
## 2      Alaska     710231               3
## 3     Arizona    6392017              11
## 4  California   37253956              55
## 5 Connecticut         NA               7
## 6    Delaware         NA               3
inner_join(tab1, tab2, by='state')
##        state population electoral_votes
## 1    Alabama    4779736               9
## 2     Alaska     710231               3
## 3    Arizona    6392017              11
## 4 California   37253956              55
full_join(tab1, tab2, by='state')
##         state population electoral_votes
## 1     Alabama    4779736               9
## 2      Alaska     710231               3
## 3     Arizona    6392017              11
## 4    Arkansas    2915918              NA
## 5  California   37253956              55
## 6    Colorado    5029196              NA
## 7 Connecticut         NA               7
## 8    Delaware         NA               3
semi_join(tab1, tab2, by='state')
##        state population
## 1    Alabama    4779736
## 2     Alaska     710231
## 3    Arizona    6392017
## 4 California   37253956
anti_join(tab1, tab2, by='state')
##      state population
## 1 Arkansas    2915918
## 2 Colorado    5029196