library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
## Warning: package 'dslabs' was built under R version 4.5.2
path <- system.file("extdata", package="dslabs")
filename <- file.path(path, "fertility-two-countries-example.csv")
wide_data <- read.csv(filename)
new_tidy_data <- pivot_longer(wide_data, `X1960`:`X2015`, names_to = "year", values_to = "fertility")
new_tidy_data$year <- str_sub(new_tidy_data$year, 2)
new_tidy_data
## # A tibble: 112 × 3
## country year fertility
## <chr> <chr> <dbl>
## 1 Germany 1960 2.41
## 2 Germany 1961 2.44
## 3 Germany 1962 2.47
## 4 Germany 1963 2.49
## 5 Germany 1964 2.49
## 6 Germany 1965 2.48
## 7 Germany 1966 2.44
## 8 Germany 1967 2.37
## 9 Germany 1968 2.28
## 10 Germany 1969 2.17
## # ℹ 102 more rows
new_tidy_data <- new_tidy_data |>
mutate(year = as.integer(year))
new_wide_data <- new_tidy_data %>%
pivot_wider(names_from = year, values_from =fertility)
data(murders)
data(polls_us_election_2016)
tab1 <- slice(murders, 1:6) %>%
select(state, population)
tab2 <- results_us_election_2016 %>%
filter(state %in% c('Alabama', 'Alaska', 'Arizona', 'California', 'Connecticut', 'Delaware')) %>% select(state, electoral_votes)
Types of joining
left_join(tab1, tab2, by='state')
## state population electoral_votes
## 1 Alabama 4779736 9
## 2 Alaska 710231 3
## 3 Arizona 6392017 11
## 4 Arkansas 2915918 NA
## 5 California 37253956 55
## 6 Colorado 5029196 NA
right_join(tab2, tab1, by='state')
## state electoral_votes population
## 1 California 55 37253956
## 2 Arizona 11 6392017
## 3 Alabama 9 4779736
## 4 Alaska 3 710231
## 5 Arkansas NA 2915918
## 6 Colorado NA 5029196
right_join(tab1, tab2, by='state')
## state population electoral_votes
## 1 Alabama 4779736 9
## 2 Alaska 710231 3
## 3 Arizona 6392017 11
## 4 California 37253956 55
## 5 Connecticut NA 7
## 6 Delaware NA 3
inner_join(tab1, tab2, by='state')
## state population electoral_votes
## 1 Alabama 4779736 9
## 2 Alaska 710231 3
## 3 Arizona 6392017 11
## 4 California 37253956 55
full_join(tab1, tab2, by='state')
## state population electoral_votes
## 1 Alabama 4779736 9
## 2 Alaska 710231 3
## 3 Arizona 6392017 11
## 4 Arkansas 2915918 NA
## 5 California 37253956 55
## 6 Colorado 5029196 NA
## 7 Connecticut NA 7
## 8 Delaware NA 3
semi_join(tab1, tab2, by='state')
## state population
## 1 Alabama 4779736
## 2 Alaska 710231
## 3 Arizona 6392017
## 4 California 37253956
anti_join(tab1, tab2, by='state')
## state population
## 1 Arkansas 2915918
## 2 Colorado 5029196