week 3

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dslabs)

path <- system.file("extdata", package= "dslabs")

filename <- file.path(path, "fertility-two-countries-example.csv")
filename

[1] "/Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/library/dslabs/extdata/fertility-two-countries-example.csv"

wide_data <- read.csv(filename)
head(wide_data)

      country X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970
1     Germany  2.41  2.44  2.47  2.49  2.49  2.48  2.44  2.37  2.28  2.17  2.04
2 South Korea  6.16  5.99  5.79  5.57  5.36  5.16  4.99  4.85  4.73  4.62  4.53
  X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983
1  1.92  1.80  1.70  1.62  1.56  1.53  1.50  1.49  1.48  1.47  1.47  1.46  1.46
2  4.41  4.27  4.09  3.87  3.62  3.36  3.11  2.88  2.69  2.52  2.38  2.24  2.11
  X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996
1  1.46  1.45  1.44  1.43  1.41  1.38  1.36  1.34  1.32  1.31  1.31  1.31  1.32
2  1.98  1.86  1.75  1.67  1.63  1.61  1.61  1.63  1.65  1.66  1.65  1.63  1.59
  X1997 X1998 X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
1  1.33  1.34  1.35  1.35  1.35  1.35  1.35  1.35  1.35  1.36  1.36  1.37  1.38
2  1.54  1.48  1.41  1.35  1.30  1.25  1.22  1.20  1.20  1.20  1.21  1.23  1.25
  X2010 X2011 X2012 X2013 X2014 X2015
1  1.39  1.40  1.41  1.42  1.43  1.44
2  1.27  1.29  1.30  1.32  1.34  1.36

new_tidy_data <- pivot_longer(wide_data, 'X1960':'X2015', names_to= "year", values_to= "fertility")
new_tidy_data

# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany X1960      2.41
 2 Germany X1961      2.44
 3 Germany X1962      2.47
 4 Germany X1963      2.49
 5 Germany X1964      2.49
 6 Germany X1965      2.48
 7 Germany X1966      2.44
 8 Germany X1967      2.37
 9 Germany X1968      2.28
10 Germany X1969      2.17
# ℹ 102 more rows

new_tidy_data$year <- str_sub(new_tidy_data$year,2)
new_tidy_data

# A tibble: 112 × 3
   country year  fertility
   <chr>   <chr>     <dbl>
 1 Germany 1960       2.41
 2 Germany 1961       2.44
 3 Germany 1962       2.47
 4 Germany 1963       2.49
 5 Germany 1964       2.49
 6 Germany 1965       2.48
 7 Germany 1966       2.44
 8 Germany 1967       2.37
 9 Germany 1968       2.28
10 Germany 1969       2.17
# ℹ 102 more rows

new_tidy_data <- new_tidy_data |>
  mutate(year=as.integer(year))

new_wide_data <- new_tidy_data %>%
  pivot_wider(names_from = year, values_from = fertility)

data(murders)

data(polls_us_election_2016)

tab1 <- slice(murders, 1:6) %>% select(state, population)
tab1

       state population
1    Alabama    4779736
2     Alaska     710231
3    Arizona    6392017
4   Arkansas    2915918
5 California   37253956
6   Colorado    5029196

tab2 <- results_us_election_2016 %>% filter(state %in% c("Alabama", "Alaska", "Arizona", "California", "Connecticut", "Delaware")) %>% select(state, electoral_votes)
tab2

        state electoral_votes
1  California              55
2     Arizona              11
3     Alabama               9
4 Connecticut               7
5      Alaska               3
6    Delaware               3