<!DOCTYPE html> Untitled

Untitled

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
path <- system.file("extdata", package="dslabs")
filename <- file.path(path, "fertility-two-countries-example.csv")
wide_data <- read.csv(filename)
new_tidy_data <- pivot_longer(wide_data, 'X1960':'X2015', names_to="year", values_to="fertility")
new_tidy_data$year <- str_sub(new_tidy_data$year, 2)
new_tidy_data
# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany 1960       2.41
 2 Germany 1961       2.44
 3 Germany 1962       2.47
 4 Germany 1963       2.49
 5 Germany 1964       2.49
 6 Germany 1965       2.48
 7 Germany 1966       2.44
 8 Germany 1967       2.37
 9 Germany 1968       2.28
10 Germany 1969       2.17
# ℹ 102 more rows
new_tidy_data<- new_tidy_data|>
    mutate(year=as.integer(year))
new_tidy_data
# A tibble: 112 × 3
   country  year fertility
   <chr>   <int>     <dbl>
 1 Germany  1960      2.41
 2 Germany  1961      2.44
 3 Germany  1962      2.47
 4 Germany  1963      2.49
 5 Germany  1964      2.49
 6 Germany  1965      2.48
 7 Germany  1966      2.44
 8 Germany  1967      2.37
 9 Germany  1968      2.28
10 Germany  1969      2.17
# ℹ 102 more rows
new_wide_data <- new_tidy_data %>%
pivot_wider(names_from=year, values_from = fertility)
new_wide_data
# A tibble: 2 × 57
  country  `1960` `1961` `1962` `1963` `1964` `1965` `1966` `1967` `1968` `1969`
  <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1 Germany    2.41   2.44   2.47   2.49   2.49   2.48   2.44   2.37   2.28   2.17
2 South K…   6.16   5.99   5.79   5.57   5.36   5.16   4.99   4.85   4.73   4.62
# ℹ 46 more variables: `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>,
#   `1974` <dbl>, `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>,
#   `1979` <dbl>, `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>,
#   `1984` <dbl>, `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>,
#   `1989` <dbl>, `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>,
#   `1994` <dbl>, `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
#   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>, …
data(murders)
data(polls_us_election_2016)
tab1 <- slice(murders, 1:6) %>% select(state, population)
tab2 <- results_us_election_2016 %>% filter(state %in% c("Alabama", "Alaska", "Arizona", "California", "Connecticut", "Delaware")) %>% select(state, electoral_votes)
left_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4   Arkansas    2915918              NA
5 California   37253956              55
6   Colorado    5029196              NA
right_join(tab2, tab1, by="state")
       state electoral_votes population
1 California              55   37253956
2    Arizona              11    6392017
3    Alabama               9    4779736
4     Alaska               3     710231
5   Arkansas              NA    2915918
6   Colorado              NA    5029196
right_join(tab1, tab2, by="state")
        state population electoral_votes
1     Alabama    4779736               9
2      Alaska     710231               3
3     Arizona    6392017              11
4  California   37253956              55
5 Connecticut         NA               7
6    Delaware         NA               3
inner_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4 California   37253956              55
full_join(tab1, tab2, by="state")
        state population electoral_votes
1     Alabama    4779736               9
2      Alaska     710231               3
3     Arizona    6392017              11
4    Arkansas    2915918              NA
5  California   37253956              55
6    Colorado    5029196              NA
7 Connecticut         NA               7
8    Delaware         NA               3
semi_join(tab1, tab2, by="state")
       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4 California   37253956
anti_join(tab1, tab2, by="state")
     state population
1 Arkansas    2915918
2 Colorado    5029196