Week 4

library(tidyverse) 
Warning: package 'tidyverse' was built under R version 4.5.2
Warning: package 'ggplot2' was built under R version 4.5.2
Warning: package 'tibble' was built under R version 4.5.2
Warning: package 'tidyr' was built under R version 4.5.2
Warning: package 'readr' was built under R version 4.5.2
Warning: package 'purrr' was built under R version 4.5.2
Warning: package 'dplyr' was built under R version 4.5.2
Warning: package 'stringr' was built under R version 4.5.2
Warning: package 'forcats' was built under R version 4.5.2
Warning: package 'lubridate' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs) 
Warning: package 'dslabs' was built under R version 4.5.2
path <- system.file("extdata", package="dslabs") 

filename <- file.path(path, "fertility-two-countries-example.csv") 

wide_data <- read.csv(filename) 
wide_data
      country X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970
1     Germany  2.41  2.44  2.47  2.49  2.49  2.48  2.44  2.37  2.28  2.17  2.04
2 South Korea  6.16  5.99  5.79  5.57  5.36  5.16  4.99  4.85  4.73  4.62  4.53
  X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983
1  1.92  1.80  1.70  1.62  1.56  1.53  1.50  1.49  1.48  1.47  1.47  1.46  1.46
2  4.41  4.27  4.09  3.87  3.62  3.36  3.11  2.88  2.69  2.52  2.38  2.24  2.11
  X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996
1  1.46  1.45  1.44  1.43  1.41  1.38  1.36  1.34  1.32  1.31  1.31  1.31  1.32
2  1.98  1.86  1.75  1.67  1.63  1.61  1.61  1.63  1.65  1.66  1.65  1.63  1.59
  X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
1  1.33  1.34  1.35  1.35  1.35  1.35  1.35  1.35  1.35  1.36  1.36  1.37  1.38
2  1.54  1.48  1.41  1.35  1.30  1.25  1.22  1.20  1.20  1.20  1.21  1.23  1.25
  X2010 X2011 X2012 X2013 X2014 X2015
1  1.39  1.40  1.41  1.42  1.43  1.44
2  1.27  1.29  1.30  1.32  1.34  1.36
filename <- file.path(path, "fertility-two-countries-example.csv") 
filename
[1] "C:/Users/mmaliha/AppData/Local/R/win-library/4.5/dslabs/extdata/fertility-two-countries-example.csv"
new_tidy_data <- pivot_longer(wide_data, `X1960`:`X2015`, names_to = "year", values_to = "fertility")
new_tidy_data
# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany X1960      2.41
 2 Germany X1961      2.44
 3 Germany X1962      2.47
 4 Germany X1963      2.49
 5 Germany X1964      2.49
 6 Germany X1965      2.48
 7 Germany X1966      2.44
 8 Germany X1967      2.37
 9 Germany X1968      2.28
10 Germany X1969      2.17
# ℹ 102 more rows
new_tidy_data$year <- str_sub(new_tidy_data$year,2)
new_tidy_data
# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany 1960       2.41
 2 Germany 1961       2.44
 3 Germany 1962       2.47
 4 Germany 1963       2.49
 5 Germany 1964       2.49
 6 Germany 1965       2.48
 7 Germany 1966       2.44
 8 Germany 1967       2.37
 9 Germany 1968       2.28
10 Germany 1969       2.17
# ℹ 102 more rows
new_tidy_data <- new_tidy_data |>

  mutate(year = as.integer(year))
new_tidy_data
# A tibble: 112 × 3
   country  year fertility
   <chr>   <int>     <dbl>
 1 Germany  1960      2.41
 2 Germany  1961      2.44
 3 Germany  1962      2.47
 4 Germany  1963      2.49
 5 Germany  1964      2.49
 6 Germany  1965      2.48
 7 Germany  1966      2.44
 8 Germany  1967      2.37
 9 Germany  1968      2.28
10 Germany  1969      2.17
# ℹ 102 more rows
data(murders)

data(polls_us_election_2016)

tab1 <- slice(murders, 1:6) %>% select(state, population) 

tab2 <- results_us_election_2016 %>% filter(state %in% c('Alabama', 'Alaska', 'Arizona',    'California', 'Connecticut', 'Delaware')) %>% select(state, electoral_votes)

tab1
       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4   Arkansas    2915918
5 California   37253956
6   Colorado    5029196
tab2
        state electoral_votes
1  California              55
2     Arizona              11
3     Alabama               9
4 Connecticut               7
5      Alaska               3
6    Delaware               3
left_join(tab1, tab2, by='state')
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4   Arkansas    2915918              NA
5 California   37253956              55
6   Colorado    5029196              NA