Class 5

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.1     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dslabs)
path<-system.file("extdata", package="dslabs")
filename<-("C:/Users/24680/AppData/Local/R/win-library/4.6/dslabs/extdata/fertility-two-countries-example.csv")
wide_data<-read.csv(filename)
head(wide_data)
      country X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970
1     Germany  2.41  2.44  2.47  2.49  2.49  2.48  2.44  2.37  2.28  2.17  2.04
2 South Korea  6.16  5.99  5.79  5.57  5.36  5.16  4.99  4.85  4.73  4.62  4.53
  X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983
1  1.92  1.80  1.70  1.62  1.56  1.53  1.50  1.49  1.48  1.47  1.47  1.46  1.46
2  4.41  4.27  4.09  3.87  3.62  3.36  3.11  2.88  2.69  2.52  2.38  2.24  2.11
  X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996
1  1.46  1.45  1.44  1.43  1.41  1.38  1.36  1.34  1.32  1.31  1.31  1.31  1.32
2  1.98  1.86  1.75  1.67  1.63  1.61  1.61  1.63  1.65  1.66  1.65  1.63  1.59
  X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
1  1.33  1.34  1.35  1.35  1.35  1.35  1.35  1.35  1.35  1.36  1.36  1.37  1.38
2  1.54  1.48  1.41  1.35  1.30  1.25  1.22  1.20  1.20  1.20  1.21  1.23  1.25
  X2010 X2011 X2012 X2013 X2014 X2015
1  1.39  1.40  1.41  1.42  1.43  1.44
2  1.27  1.29  1.30  1.32  1.34  1.36
library(tidyverse)
library(dslabs)
path<-system.file("extdata", package="dslabs")
filename<-("C:/Users/24680/AppData/Local/R/win-library/4.6/dslabs/extdata/fertility-two-countries-example.csv")
wide_data<-read.csv(filename)
head(wide_data)
      country X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970
1     Germany  2.41  2.44  2.47  2.49  2.49  2.48  2.44  2.37  2.28  2.17  2.04
2 South Korea  6.16  5.99  5.79  5.57  5.36  5.16  4.99  4.85  4.73  4.62  4.53
  X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983
1  1.92  1.80  1.70  1.62  1.56  1.53  1.50  1.49  1.48  1.47  1.47  1.46  1.46
2  4.41  4.27  4.09  3.87  3.62  3.36  3.11  2.88  2.69  2.52  2.38  2.24  2.11
  X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996
1  1.46  1.45  1.44  1.43  1.41  1.38  1.36  1.34  1.32  1.31  1.31  1.31  1.32
2  1.98  1.86  1.75  1.67  1.63  1.61  1.61  1.63  1.65  1.66  1.65  1.63  1.59
  X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
1  1.33  1.34  1.35  1.35  1.35  1.35  1.35  1.35  1.35  1.36  1.36  1.37  1.38
2  1.54  1.48  1.41  1.35  1.30  1.25  1.22  1.20  1.20  1.20  1.21  1.23  1.25
  X2010 X2011 X2012 X2013 X2014 X2015
1  1.39  1.40  1.41  1.42  1.43  1.44
2  1.27  1.29  1.30  1.32  1.34  1.36
new_tidy_data<-pivot_longer(wide_data,`X1960`:`X2015`, names_to = "year", values_to = "fertility")

new_tidy_data$year<=str_sub(new_tidy_data$year,2)
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[109] FALSE FALSE FALSE FALSE
new_tidy_data
# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany X1960      2.41
 2 Germany X1961      2.44
 3 Germany X1962      2.47
 4 Germany X1963      2.49
 5 Germany X1964      2.49
 6 Germany X1965      2.48
 7 Germany X1966      2.44
 8 Germany X1967      2.37
 9 Germany X1968      2.28
10 Germany X1969      2.17
# ℹ 102 more rows
new_tidy_data<-new_tidy_data|>
  mutate(year=as.integer(year))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `year = as.integer(year)`.
Caused by warning:
! NAs introduced by coercion
new_wide_data<-new_tidy_data %>% pivot_wider(names_from = year, values_from= fertility)
Warning: Values from `fertility` are not uniquely identified; output will contain
list-cols.
• Use `values_fn = list` to suppress this warning.
• Use `values_fn = {summary_fun}` to summarise duplicates.
• Use the following dplyr code to identify duplicates.
  {data} |>
  dplyr::summarise(n = dplyr::n(), .by = c(country, year)) |>
  dplyr::filter(n > 1L)
data(murders)
data("polls_us_election_2016")
tab1<-slice(murders, 1:6)%>% select(state, population)
tab1
       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4   Arkansas    2915918
5 California   37253956
6   Colorado    5029196
tab2<-results_us_election_2016%>% filter(state %in% c("Alabama", "Alaska", "Arizona", "California", "Conneticut", "Deleware"))%>% select(state, electoral_votes)
tab2
       state electoral_votes
1 California              55
2    Arizona              11
3    Alabama               9
4     Alaska               3
left_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4   Arkansas    2915918              NA
5 California   37253956              55
6   Colorado    5029196              NA
right_join(tab2, tab1, by="state")
       state electoral_votes population
1 California              55   37253956
2    Arizona              11    6392017
3    Alabama               9    4779736
4     Alaska               3     710231
5   Arkansas              NA    2915918
6   Colorado              NA    5029196
right_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4 California   37253956              55
inner_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4 California   37253956              55
full_join(tab1, tab2, by="state")
       state population electoral_votes
1    Alabama    4779736               9
2     Alaska     710231               3
3    Arizona    6392017              11
4   Arkansas    2915918              NA
5 California   37253956              55
6   Colorado    5029196              NA
semi_join(tab1, tab2, by="state")
       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4 California   37253956
anti_join(tab1, tab2, by="state")
     state population
1 Arkansas    2915918
2 Colorado    5029196