library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
MyData <- read_excel("C:/Users/owner/OneDrive - USNH/Desktop/PSU_DAT3000_IntroToDA/00_data/MyData.xlsx")
View(MyData)
MyData %>%
pivot_longer(c("office", "state"), names_to = "representative", values_to = "homestate")
## # A tibble: 64,904 × 20
## year state_po state_fips state_cen state_ic district stage runoff special
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <lgl> <lgl>
## 1 1976 AL 1 63 41 1 GEN FALSE FALSE
## 2 1976 AL 1 63 41 1 GEN FALSE FALSE
## 3 1976 AL 1 63 41 1 GEN FALSE FALSE
## 4 1976 AL 1 63 41 1 GEN FALSE FALSE
## 5 1976 AL 1 63 41 1 GEN FALSE FALSE
## 6 1976 AL 1 63 41 1 GEN FALSE FALSE
## 7 1976 AL 1 63 41 2 GEN FALSE FALSE
## 8 1976 AL 1 63 41 2 GEN FALSE FALSE
## 9 1976 AL 1 63 41 2 GEN FALSE FALSE
## 10 1976 AL 1 63 41 2 GEN FALSE FALSE
## # ℹ 64,894 more rows
## # ℹ 11 more variables: candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## # candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## # fusion_ticket <lgl>, representative <chr>, homestate <chr>
MyData %>%
pivot_wider(names_from = totalvotes, values_from = candidatevotes)
## Warning: Values from `candidatevotes` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(year, state, state_po, state_fips,
## state_cen, state_ic, office, district, stage, runoff, special, candidate,
## party, writein, mode, unofficial, version, fusion_ticket, totalvotes)) |>
## dplyr::filter(n > 1L)
## # A tibble: 32,347 × 10,062
## year state state_po state_fips state_cen state_ic office district stage
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 2 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 3 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 4 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 5 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 6 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 7 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 8 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 9 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 10 1976 ALABAMA AL 1 63 41 US HOUSE 4 GEN
## # ℹ 32,337 more rows
## # ℹ 10,053 more variables: runoff <lgl>, special <lgl>, candidate <chr>,
## # party <chr>, writein <lgl>, mode <chr>, unofficial <lgl>, version <dbl>,
## # fusion_ticket <lgl>, `157170` <list>, `156362` <list>, `108048` <list>,
## # `176022` <list>, `113560` <list>, `162518` <list>, `110501` <list>,
## # `118208` <list>, `168119` <list>, `182128` <list>, `187165` <list>,
## # `191590` <list>, `168782` <list>, `167607` <list>, `1` <list>, …
MyData %>%
unite(col = "congressional district", c(state,district), sep = " ")
## # A tibble: 32,452 × 19
## year `congressional district` state_po state_fips state_cen state_ic office
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr>
## 1 1976 ALABAMA 1 AL 1 63 41 US HOU…
## 2 1976 ALABAMA 1 AL 1 63 41 US HOU…
## 3 1976 ALABAMA 1 AL 1 63 41 US HOU…
## 4 1976 ALABAMA 2 AL 1 63 41 US HOU…
## 5 1976 ALABAMA 2 AL 1 63 41 US HOU…
## 6 1976 ALABAMA 2 AL 1 63 41 US HOU…
## 7 1976 ALABAMA 3 AL 1 63 41 US HOU…
## 8 1976 ALABAMA 3 AL 1 63 41 US HOU…
## 9 1976 ALABAMA 3 AL 1 63 41 US HOU…
## 10 1976 ALABAMA 4 AL 1 63 41 US HOU…
## # ℹ 32,442 more rows
## # ℹ 12 more variables: stage <chr>, runoff <lgl>, special <lgl>,
## # candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## # candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## # fusion_ticket <lgl>
MyData %>%
separate(col = "office", into = c("coutry", "position"), sep = "/")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 32452 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## # A tibble: 32,452 × 21
## year state state_po state_fips state_cen state_ic coutry position district
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 1976 ALABAMA AL 1 63 41 US HO… <NA> 1
## 2 1976 ALABAMA AL 1 63 41 US HO… <NA> 1
## 3 1976 ALABAMA AL 1 63 41 US HO… <NA> 1
## 4 1976 ALABAMA AL 1 63 41 US HO… <NA> 2
## 5 1976 ALABAMA AL 1 63 41 US HO… <NA> 2
## 6 1976 ALABAMA AL 1 63 41 US HO… <NA> 2
## 7 1976 ALABAMA AL 1 63 41 US HO… <NA> 3
## 8 1976 ALABAMA AL 1 63 41 US HO… <NA> 3
## 9 1976 ALABAMA AL 1 63 41 US HO… <NA> 3
## 10 1976 ALABAMA AL 1 63 41 US HO… <NA> 4
## # ℹ 32,442 more rows
## # ℹ 12 more variables: stage <chr>, runoff <lgl>, special <lgl>,
## # candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## # candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## # fusion_ticket <lgl>