Week 4 Data-110

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.2.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
path <- system.file("extdata", package="dslabs")

filename <- file.path(path, "fertility-two-countries-example.csv")

wide_data <- read.csv(filename) 
new_tidy_data <- pivot_longer(wide_data, `X1960`:`X2015`, names_to = "year", values_to = "fertility")
new_tidy_data$year <- str_sub(new_tidy_data$year, 2)
new_tidy_data <- new_tidy_data |>
  mutate(year = as.integer(year))
new_tidy_data
# A tibble: 112 × 3
   country  year fertility
   <chr>   <int>     <dbl>
 1 Germany  1960      2.41
 2 Germany  1961      2.44
 3 Germany  1962      2.47
 4 Germany  1963      2.49
 5 Germany  1964      2.49
 6 Germany  1965      2.48
 7 Germany  1966      2.44
 8 Germany  1967      2.37
 9 Germany  1968      2.28
10 Germany  1969      2.17
# ℹ 102 more rows
new_wide_data <- new_tidy_data %>%
  pivot_wider(names_from = year, values_from =fertility)
new_wide_data
# A tibble: 2 × 57
  country  `1960` `1961` `1962` `1963` `1964` `1965` `1966` `1967` `1968` `1969`
  <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1 Germany    2.41   2.44   2.47   2.49   2.49   2.48   2.44   2.37   2.28   2.17
2 South K…   6.16   5.99   5.79   5.57   5.36   5.16   4.99   4.85   4.73   4.62
# ℹ 46 more variables: `1970` <dbl>, `1971` <dbl>, `1972` <dbl>, `1973` <dbl>,
#   `1974` <dbl>, `1975` <dbl>, `1976` <dbl>, `1977` <dbl>, `1978` <dbl>,
#   `1979` <dbl>, `1980` <dbl>, `1981` <dbl>, `1982` <dbl>, `1983` <dbl>,
#   `1984` <dbl>, `1985` <dbl>, `1986` <dbl>, `1987` <dbl>, `1988` <dbl>,
#   `1989` <dbl>, `1990` <dbl>, `1991` <dbl>, `1992` <dbl>, `1993` <dbl>,
#   `1994` <dbl>, `1995` <dbl>, `1996` <dbl>, `1997` <dbl>, `1998` <dbl>,
#   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>, …
data(murders)
head(murders)
       state abb region population total
1    Alabama  AL  South    4779736   135
2     Alaska  AK   West     710231    19
3    Arizona  AZ   West    6392017   232
4   Arkansas  AR  South    2915918    93
5 California  CA   West   37253956  1257
6   Colorado  CO   West    5029196    65
data(murders)
    data(polls_us_election_2016)
    tab1 <- slice(murders, 1:6) %>% select(state, population)
    tab2 <- results_us_election_2016 %>% filter(state %in% c("Alabama”, “Alaska”, “Arizona”,    “California”, “Connecticut”, “Delaware")) %>%
    select(state, electoral_votes)
left_join(tab1, tab2, by="state") 
       state population electoral_votes
1    Alabama    4779736              NA
2     Alaska     710231              NA
3    Arizona    6392017              NA
4   Arkansas    2915918              NA
5 California   37253956              NA
6   Colorado    5029196              NA
right_join(tab2, tab1, by="state")
       state electoral_votes population
1    Alabama              NA    4779736
2     Alaska              NA     710231
3    Arizona              NA    6392017
4   Arkansas              NA    2915918
5 California              NA   37253956
6   Colorado              NA    5029196
right_join(tab1, tab2, by="state")
[1] state           population      electoral_votes
<0 rows> (or 0-length row.names)
inner_join(tab1, tab2, by="state") 
[1] state           population      electoral_votes
<0 rows> (or 0-length row.names)
full_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736              NA
2     Alaska     710231              NA
3    Arizona    6392017              NA
4   Arkansas    2915918              NA
5 California   37253956              NA
6   Colorado    5029196              NA
semi_join(tab1, tab2, by = "state")
[1] state      population
<0 rows> (or 0-length row.names)
anti_join(tab1, tab2, by="state")
       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4   Arkansas    2915918
5 California   37253956
6   Colorado    5029196